Extending test-client-custom-summary to try e_book_client_get_contacts_uids()
[platform/upstream/evolution-data-server.git] / camel / camel-mime-utils.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  *  Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
4  *
5  *  Authors: Michael Zucchi <notzed@ximian.com>
6  *           Jeffrey Stedfast <fejj@ximian.com>
7  *
8  * This program is free software; you can redistribute it and/or
9  * modify it under the terms of version 2 of the GNU Lesser General Public
10  * License as published by the Free Software Foundation.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15  * General Public License for more details.
16  *
17  * You should have received a copy of the GNU Lesser General Public
18  * License along with this program; if not, write to the
19  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20  * Boston, MA 02110-1301, USA.
21  */
22
23 #ifdef HAVE_CONFIG_H
24 #include <config.h>
25 #endif
26
27 #include <stdio.h>
28 #include <stdlib.h>
29 #include <string.h>
30 #include <sys/types.h>
31 #include <sys/param.h>  /* for MAXHOSTNAMELEN */
32 #include <sys/stat.h>
33 #include <unistd.h>
34 #include <regex.h>
35 #include <fcntl.h>
36 #include <errno.h>
37 #include <ctype.h>
38 #include <time.h>
39
40 #ifndef MAXHOSTNAMELEN
41 #define MAXHOSTNAMELEN 1024
42 #endif
43
44 #include "camel-charset-map.h"
45 #include "camel-iconv.h"
46 #include "camel-mime-utils.h"
47 #include "camel-net-utils.h"
48 #ifdef G_OS_WIN32
49 #include <winsock2.h>
50 #include <ws2tcpip.h>
51 #ifdef HAVE_WSPIAPI_H
52 #include <wspiapi.h>
53 #endif
54 #endif
55 #include "camel-utf8.h"
56
57 #ifdef G_OS_WIN32
58 #ifdef gmtime_r
59 #undef gmtime_r
60 #endif
61
62 /* The gmtime() in Microsoft's C library is MT-safe */
63 #define gmtime_r(tp,tmp) (gmtime(tp)?(*(tmp)=*gmtime(tp),(tmp)):0)
64 #endif
65
66 /* for all non-essential warnings ... */
67 #define w(x)
68
69 #define d(x)
70 #define d2(x)
71
72 /**
73  * camel_mktime_utc:
74  * @tm: the #tm to convert to a calendar time representation
75  *
76  * Like mktime(3), but assumes UTC instead of local timezone.
77  *
78  * Returns: the calendar time representation of @tm
79  *
80  * Since: 3.4
81  **/
82 time_t
83 camel_mktime_utc (struct tm *tm)
84 {
85         time_t tt;
86
87         tm->tm_isdst = -1;
88         tt = mktime (tm);
89
90 #if defined (HAVE_TM_GMTOFF)
91         tt += tm->tm_gmtoff;
92 #elif defined (HAVE_TIMEZONE)
93         if (tm->tm_isdst > 0) {
94 #if defined (HAVE_ALTZONE)
95                 tt -= altzone;
96 #else
97                 tt -= (timezone - 3600);
98 #endif
99         } else
100                 tt -= timezone;
101 #endif
102
103         return tt;
104 }
105
106 /**
107  * camel_localtime_with_offset:
108  * @tt: the #time_t to convert
109  * @tm: the #tm to store the result in
110  * @offset: the #gint to store the offset in
111  *
112  * Converts the calendar time representation @tt to a broken-down
113  * time representation, stored in @tm, and provides the offset in
114  * seconds from UTC time, stored in @offset.
115  **/
116 void
117 camel_localtime_with_offset (time_t tt,
118                              struct tm *tm,
119                              gint *offset)
120 {
121         localtime_r (&tt, tm);
122
123 #if defined (HAVE_TM_GMTOFF)
124         *offset = tm->tm_gmtoff;
125 #elif defined (HAVE_TIMEZONE)
126         if (tm->tm_isdst > 0) {
127 #if defined (HAVE_ALTZONE)
128                 *offset = -altzone;
129 #else
130                 *offset = -(timezone - 3600);
131 #endif
132         } else
133                 *offset = -timezone;
134 #endif
135 }
136
137 #define CAMEL_UUENCODE_CHAR(c)  ((c) ? (c) + ' ' : '`')
138 #define CAMEL_UUDECODE_CHAR(c)  (((c) - ' ') & 077)
139
140 static const guchar tohex[16] = {
141         '0', '1', '2', '3', '4', '5', '6', '7',
142         '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
143 };
144
145 /**
146  * camel_uuencode_close:
147  * @in: input stream
148  * @len: input stream length
149  * @out: output stream
150  * @uubuf: temporary buffer of 60 bytes
151  * @state: holds the number of bits that are stored in @save
152  * @save: leftover bits that have not yet been encoded
153  *
154  * Uuencodes a chunk of data. Call this when finished encoding data
155  * with camel_uuencode_step() to flush off the last little bit.
156  *
157  * Returns: the number of bytes encoded
158  **/
159 gsize
160 camel_uuencode_close (guchar *in,
161                       gsize len,
162                       guchar *out,
163                       guchar *uubuf,
164                       gint *state,
165                       guint32 *save)
166 {
167         register guchar *outptr, *bufptr;
168         register guint32 saved;
169         gint uulen, uufill, i;
170
171         outptr = out;
172
173         if (len > 0)
174                 outptr += camel_uuencode_step (in, len, out, uubuf, state, save);
175
176         uufill = 0;
177
178         saved = *save;
179         i = *state & 0xff;
180         uulen = (*state >> 8) & 0xff;
181
182         bufptr = uubuf + ((uulen / 3) * 4);
183
184         if (i > 0) {
185                 while (i < 3) {
186                         saved <<= 8;
187                         uufill++;
188                         i++;
189                 }
190
191                 if (i == 3) {
192                         /* convert 3 normal bytes into 4 uuencoded bytes */
193                         guchar b0, b1, b2;
194
195                         b0 = (saved >> 16) & 0xff;
196                         b1 = (saved >> 8) & 0xff;
197                         b2 = saved & 0xff;
198
199                         *bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
200                         *bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
201                         *bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
202                         *bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
203
204                         i = 0;
205                         saved = 0;
206                         uulen += 3;
207                 }
208         }
209
210         if (uulen > 0) {
211                 gint cplen = ((uulen / 3) * 4);
212
213                 *outptr++ = CAMEL_UUENCODE_CHAR ((uulen - uufill) & 0xff);
214                 memcpy (outptr, uubuf, cplen);
215                 outptr += cplen;
216                 *outptr++ = '\n';
217                 uulen = 0;
218         }
219
220         *outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
221         *outptr++ = '\n';
222
223         *save = 0;
224         *state = 0;
225
226         return outptr - out;
227 }
228
229 /**
230  * camel_uuencode_step:
231  * @in: input stream
232  * @len: input stream length
233  * @out: output stream
234  * @uubuf: temporary buffer of 60 bytes
235  * @state: holds the number of bits that are stored in @save
236  * @save: leftover bits that have not yet been encoded
237  *
238  * Uuencodes a chunk of data. Performs an 'encode step', only encodes
239  * blocks of 45 characters to the output at a time, saves left-over
240  * state in @uubuf, @state and @save (initialize to 0 on first
241  * invocation).
242  *
243  * Returns: the number of bytes encoded
244  **/
245 gsize
246 camel_uuencode_step (guchar *in,
247                      gsize len,
248                      guchar *out,
249                      guchar *uubuf,
250                      gint *state,
251                      guint32 *save)
252 {
253         register guchar *inptr, *outptr, *bufptr;
254         guchar b0, b1, b2, *inend;
255         register guint32 saved;
256         gint uulen, i;
257
258         if (len == 0)
259                 return 0;
260
261         inend = in + len;
262         outptr = out;
263         inptr = in;
264
265         saved = *save;
266         i = *state & 0xff;
267         uulen = (*state >> 8) & 0xff;
268
269         if ((len + uulen) < 45) {
270                 /* not enough input to write a full uuencoded line */
271                 bufptr = uubuf + ((uulen / 3) * 4);
272         } else {
273                 bufptr = outptr + 1;
274
275                 if (uulen > 0) {
276                         /* copy the previous call's tmpbuf to outbuf */
277                         memcpy (bufptr, uubuf, ((uulen / 3) * 4));
278                         bufptr += ((uulen / 3) * 4);
279                 }
280         }
281
282         if (i == 2) {
283                 b0 = (saved >> 8) & 0xff;
284                 b1 = saved & 0xff;
285                 saved = 0;
286                 i = 0;
287
288                 goto skip2;
289         } else if (i == 1) {
290                 if ((inptr + 2) < inend) {
291                         b0 = saved & 0xff;
292                         saved = 0;
293                         i = 0;
294
295                         goto skip1;
296                 }
297
298                 while (inptr < inend) {
299                         saved = (saved << 8) | *inptr++;
300                         i++;
301                 }
302         }
303
304         while (inptr < inend) {
305                 while (uulen < 45 && (inptr + 3) <= inend) {
306                         b0 = *inptr++;
307                 skip1:
308                         b1 = *inptr++;
309                 skip2:
310                         b2 = *inptr++;
311
312                         /* convert 3 normal bytes into 4 uuencoded bytes */
313                         *bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
314                         *bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
315                         *bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
316                         *bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
317
318                         uulen += 3;
319                 }
320
321                 if (uulen >= 45) {
322                         *outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
323                         outptr += ((45 / 3) * 4) + 1;
324
325                         *outptr++ = '\n';
326                         uulen = 0;
327
328                         if ((inptr + 45) <= inend) {
329                                 /* we have enough input to output another full line */
330                                 bufptr = outptr + 1;
331                         } else {
332                                 bufptr = uubuf;
333                         }
334                 } else {
335                         /* not enough input to continue... */
336                         for (i = 0, saved = 0; inptr < inend; i++)
337                                 saved = (saved << 8) | *inptr++;
338                 }
339         }
340
341         *save = saved;
342         *state = ((uulen & 0xff) << 8) | (i & 0xff);
343
344         return outptr - out;
345 }
346
347 /**
348  * camel_uudecode_step:
349  * @in: input stream
350  * @inlen: max length of data to decode
351  * @out: output stream
352  * @state: holds the number of bits that are stored in @save
353  * @save: leftover bits that have not yet been decoded
354  *
355  * Uudecodes a chunk of data. Performs a 'decode step' on a chunk of
356  * uuencoded data. Assumes the "begin mode filename" line has
357  * been stripped off.
358  *
359  * Returns: the number of bytes decoded
360  **/
361 gsize
362 camel_uudecode_step (guchar *in,
363                      gsize len,
364                      guchar *out,
365                      gint *state,
366                      guint32 *save)
367 {
368         register guchar *inptr, *outptr;
369         guchar *inend, ch;
370         register guint32 saved;
371         gboolean last_was_eoln;
372         gint uulen, i;
373
374         if (*state & CAMEL_UUDECODE_STATE_END)
375                 return 0;
376
377         saved = *save;
378         i = *state & 0xff;
379         uulen = (*state >> 8) & 0xff;
380         if (uulen == 0)
381                 last_was_eoln = TRUE;
382         else
383                 last_was_eoln = FALSE;
384
385         inend = in + len;
386         outptr = out;
387         inptr = in;
388
389         while (inptr < inend) {
390                 if (*inptr == '\n') {
391                         last_was_eoln = TRUE;
392
393                         inptr++;
394                         continue;
395                 } else if (!uulen || last_was_eoln) {
396                         /* first octet on a line is the uulen octet */
397                         uulen = CAMEL_UUDECODE_CHAR (*inptr);
398                         last_was_eoln = FALSE;
399                         if (uulen == 0) {
400                                 *state |= CAMEL_UUDECODE_STATE_END;
401                                 break;
402                         }
403
404                         inptr++;
405                         continue;
406                 }
407
408                 ch = *inptr++;
409
410                 if (uulen > 0) {
411                         /* save the byte */
412                         saved = (saved << 8) | ch;
413                         i++;
414                         if (i == 4) {
415                                 /* convert 4 uuencoded bytes to 3 normal bytes */
416                                 guchar b0, b1, b2, b3;
417
418                                 b0 = saved >> 24;
419                                 b1 = saved >> 16 & 0xff;
420                                 b2 = saved >> 8 & 0xff;
421                                 b3 = saved & 0xff;
422
423                                 if (uulen >= 3) {
424                                         *outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
425                                         *outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
426                                         *outptr++ = CAMEL_UUDECODE_CHAR (b2) << 6 | CAMEL_UUDECODE_CHAR (b3);
427                                         uulen -= 3;
428                                 } else {
429                                         if (uulen >= 1) {
430                                                 *outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
431                                                 uulen--;
432                                         }
433
434                                         if (uulen >= 2) {
435                                                 *outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
436                                                 uulen--;
437                                         }
438                                 }
439
440                                 i = 0;
441                                 saved = 0;
442                         }
443                 } else {
444                         break;
445                 }
446         }
447
448         *save = saved;
449         *state = (*state & CAMEL_UUDECODE_STATE_MASK) | ((uulen & 0xff) << 8) | (i & 0xff);
450
451         return outptr - out;
452 }
453
454 /**
455  * camel_quoted_encode_close:
456  * @in: input stream
457  * @len: length of the input
458  * @out: output string
459  * @state: holds the number of bits that are stored in @save
460  * @save: leftover bits that have not yet been encoded
461  *
462  * Quoted-printable encodes a block of text. Call this when finished
463  * encoding data with camel_quoted_encode_step() to flush off
464  * the last little bit.
465  *
466  * Returns: the number of bytes encoded
467  **/
468 gsize
469 camel_quoted_encode_close (guchar *in,
470                            gsize len,
471                            guchar *out,
472                            gint *state,
473                            gint *save)
474 {
475         register guchar *outptr = out;
476         gint last;
477
478         if (len > 0)
479                 outptr += camel_quoted_encode_step (in, len, outptr, state, save);
480
481         last = *state;
482         if (last != -1) {
483                 /* space/tab must be encoded if it's the last character on
484                  * the line */
485                 if (camel_mime_is_qpsafe (last) && last != ' ' && last != 9) {
486                         *outptr++ = last;
487                 } else {
488                         *outptr++ = '=';
489                         *outptr++ = tohex[(last>>4) & 0xf];
490                         *outptr++ = tohex[last & 0xf];
491                 }
492         }
493
494         *save = 0;
495         *state = -1;
496
497         return outptr - out;
498 }
499
500 /**
501  * camel_quoted_encode_step:
502  * @in: input stream
503  * @len: length of the input
504  * @out: output string
505  * @state: holds the number of bits that are stored in @save
506  * @save: leftover bits that have not yet been encoded
507  *
508  * Quoted-printable encodes a block of text. Performs an 'encode
509  * step', saves left-over state in state and save (initialise to -1 on
510  * first invocation).
511  *
512  * Returns: the number of bytes encoded
513  **/
514 gsize
515 camel_quoted_encode_step (guchar *in,
516                           gsize len,
517                           guchar *out,
518                           gint *statep,
519                           gint *save)
520 {
521         register guchar *inptr, *outptr, *inend;
522         guchar c;
523         register gint sofar = *save;  /* keeps track of how many chars on a line */
524         register gint last = *statep; /* keeps track if last gchar to end was a space cr etc */
525
526         #define output_last()                           \
527                 if (sofar + 3 > 74) {                   \
528                         *outptr++ = '=';                \
529                         *outptr++ = '\n';               \
530                         sofar = 0;                      \
531                 }                                       \
532                 *outptr++ = '=';                        \
533                 *outptr++ = tohex[(last >> 4) & 0xf];   \
534                 *outptr++ = tohex[last & 0xf];          \
535                 sofar += 3;
536
537         inptr = in;
538         inend = in + len;
539         outptr = out;
540         while (inptr < inend) {
541                 c = *inptr++;
542                 if (c == '\r') {
543                         if (last != -1) {
544                                 output_last ();
545                         }
546                         last = c;
547                 } else if (c == '\n') {
548                         if (last != -1 && last != '\r') {
549                                 output_last ();
550                         }
551                         *outptr++ = '\n';
552                         sofar = 0;
553                         last = -1;
554                 } else {
555                         if (last != -1) {
556                                 if (camel_mime_is_qpsafe (last)) {
557                                         *outptr++ = last;
558                                         sofar++;
559                                 } else {
560                                         output_last ();
561                                 }
562                         }
563
564                         if (camel_mime_is_qpsafe (c)) {
565                                 if (sofar > 74) {
566                                         *outptr++ = '=';
567                                         *outptr++ = '\n';
568                                         sofar = 0;
569                                 }
570
571                                 /* delay output of space gchar */
572                                 if (c == ' ' || c == '\t') {
573                                         last = c;
574                                 } else {
575                                         *outptr++ = c;
576                                         sofar++;
577                                         last = -1;
578                                 }
579                         } else {
580                                 if (sofar > 72) {
581                                         *outptr++ = '=';
582                                         *outptr++ = '\n';
583                                         sofar = 3;
584                                 } else
585                                         sofar += 3;
586
587                                 *outptr++ = '=';
588                                 *outptr++ = tohex[(c >> 4) & 0xf];
589                                 *outptr++ = tohex[c & 0xf];
590                                 last = -1;
591                         }
592                 }
593         }
594         *save = sofar;
595         *statep = last;
596
597         #undef output_last
598
599         return (outptr - out);
600 }
601
602 /*
603  * FIXME: this does not strip trailing spaces from lines (as it should, rfc 2045, section 6.7)
604  * Should it also canonicalise the end of line to CR LF??
605  *
606  * Note: Trailing rubbish (at the end of input), like = or =x or =\r will be lost.
607  */
608
609 /**
610  * camel_quoted_decode_step:
611  * @in: input stream
612  * @len: max length of data to decode
613  * @out: output stream
614  * @savestate: holds the number of bits that are stored in @save
615  * @saveme: leftover bits that have not yet been decoded
616  *
617  * Decodes a block of quoted-printable encoded data. Performs a
618  * 'decode step' on a chunk of QP encoded data.
619  *
620  * Returns: the number of bytes decoded
621  **/
622 gsize
623 camel_quoted_decode_step (guchar *in,
624                           gsize len,
625                           guchar *out,
626                           gint *savestate,
627                           gint *saveme)
628 {
629         register guchar *inptr, *outptr;
630         guchar *inend, c;
631         gint state, save;
632
633         inend = in + len;
634         outptr = out;
635
636         d (printf ("quoted-printable, decoding text '%.*s'\n", len, in));
637
638         state = *savestate;
639         save = *saveme;
640         inptr = in;
641         while (inptr < inend) {
642                 switch (state) {
643                 case 0:
644                         while (inptr < inend) {
645                                 c = *inptr++;
646                                 if (c == '=') {
647                                         state = 1;
648                                         break;
649                                 }
650 #ifdef CANONICALISE_EOL
651                                 /*else if (c=='\r') {
652                                         state = 3;
653                                 } else if (c == '\n') {
654                                         *outptr++ = '\r';
655                                         *outptr++ = c;
656                                         } */
657 #endif
658                                 else {
659                                         *outptr++ = c;
660                                 }
661                         }
662                         break;
663                 case 1:
664                         c = *inptr++;
665                         if (c == '\n') {
666                                 /* soft break ... unix end of line */
667                                 state = 0;
668                         } else {
669                                 save = c;
670                                 state = 2;
671                         }
672                         break;
673                 case 2:
674                         c = *inptr++;
675                         if (isxdigit (c) && isxdigit (save)) {
676                                 c = toupper (c);
677                                 save = toupper (save);
678                                 *outptr++ = (((save>='A'?save-'A'+10:save-'0')&0x0f) << 4)
679                                         | ((c >= 'A' ? c - 'A' + 10 : c - '0') &0x0f);
680                         } else if (c == '\n' && save == '\r') {
681                                 /* soft break ... canonical end of line */
682                         } else {
683                                 /* just output the data */
684                                 *outptr++ = '=';
685                                 *outptr++ = save;
686                                 *outptr++ = c;
687                         }
688                         state = 0;
689                         break;
690 #ifdef CANONICALISE_EOL
691                 case 3:
692                         /* convert \r -> to \r\n, leaves \r\n alone */
693                         c = *inptr++;
694                         if (c == '\n') {
695                                 *outptr++ = '\r';
696                                 *outptr++ = c;
697                         } else {
698                                 *outptr++ = '\r';
699                                 *outptr++ = '\n';
700                                 *outptr++ = c;
701                         }
702                         state = 0;
703                         break;
704 #endif
705                 }
706         }
707
708         *savestate = state;
709         *saveme = save;
710
711         return outptr - out;
712 }
713
714 /*
715  * this is for the "Q" encoding of international words,
716  * which is slightly different than plain quoted-printable (mainly by allowing 0x20 <> _)
717 */
718 static gsize
719 quoted_decode (const guchar *in,
720                gsize len,
721                guchar *out)
722 {
723         register const guchar *inptr;
724         register guchar *outptr;
725         const guchar *inend;
726         guchar c, c1;
727         gint ret = 0;
728
729         inend = in + len;
730         outptr = out;
731
732         d (printf ("decoding text '%.*s'\n", len, in));
733
734         inptr = in;
735         while (inptr < inend) {
736                 c = *inptr++;
737                 if (c == '=') {
738                         /* silently ignore truncated data? */
739                         if (inend - in >= 2) {
740                                 c = toupper (*inptr++);
741                                 c1 = toupper (*inptr++);
742                                 *outptr++ = (((c>='A'?c-'A'+10:c-'0')&0x0f) << 4)
743                                         | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') &0x0f);
744                         } else {
745                                 ret = -1;
746                                 break;
747                         }
748                 } else if (c == '_') {
749                         *outptr++ = 0x20;
750                 } else {
751                         *outptr++ = c;
752                 }
753         }
754         if (ret == 0) {
755                 return outptr - out;
756         }
757         return 0;
758 }
759
760 /* rfc2047 version of quoted-printable */
761 /* safemask is the mask to apply to the camel_mime_special_table to determine what
762  * characters can safely be included without encoding */
763 static gsize
764 quoted_encode (const guchar *in,
765                gsize len,
766                guchar *out,
767                gushort safemask)
768 {
769         register const guchar *inptr, *inend;
770         guchar *outptr;
771         guchar c;
772
773         inptr = in;
774         inend = in + len;
775         outptr = out;
776         while (inptr < inend) {
777                 c = *inptr++;
778                 if (c == ' ') {
779                         *outptr++ = '_';
780                 } else if (camel_mime_special_table[c] & safemask) {
781                         *outptr++ = c;
782                 } else {
783                         *outptr++ = '=';
784                         *outptr++ = tohex[(c >> 4) & 0xf];
785                         *outptr++ = tohex[c & 0xf];
786                 }
787         }
788
789         d (printf ("encoding '%.*s' = '%.*s'\n", len, in, outptr - out, out));
790
791         return (outptr - out);
792 }
793
794 static void
795 header_decode_lwsp (const gchar **in)
796 {
797         const gchar *inptr = *in;
798         gchar c;
799
800         d2 (printf ("is ws: '%s'\n", *in));
801
802         while ((camel_mime_is_lwsp (*inptr) || *inptr =='(') && *inptr != '\0') {
803                 while (camel_mime_is_lwsp (*inptr) && *inptr != '\0') {
804                         d2 (printf ("(%c)", *inptr));
805                         inptr++;
806                 }
807                 d2 (printf ("\n"));
808
809                 /* check for comments */
810                 if (*inptr == '(') {
811                         gint depth = 1;
812                         inptr++;
813                         while (depth && (c=*inptr) && *inptr != '\0') {
814                                 if (c == '\\' && inptr[1]) {
815                                         inptr++;
816                                 } else if (c == '(') {
817                                         depth++;
818                                 } else if (c == ')') {
819                                         depth--;
820                                 }
821                                 inptr++;
822                         }
823                 }
824         }
825         *in = inptr;
826 }
827
828 static gchar *
829 camel_iconv_strndup (iconv_t cd,
830                      const gchar *string,
831                      gsize n)
832 {
833         gsize inleft, outleft, converted = 0;
834         gchar *out, *outbuf;
835         const gchar *inbuf;
836         gsize outlen;
837         gint errnosav;
838
839         if (cd == (iconv_t) -1)
840                 return g_strndup (string, n);
841
842         outlen = n * 2 + 16;
843         out = g_malloc (outlen + 4);
844
845         inbuf = string;
846         inleft = n;
847
848         do {
849                 errno = 0;
850                 outbuf = out + converted;
851                 outleft = outlen - converted;
852
853                 converted = iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
854                 if (converted == (gsize) -1) {
855                         if (errno != E2BIG && errno != EINVAL)
856                                 goto fail;
857                 }
858
859                 /*
860                  * E2BIG   There is not sufficient room at *outbuf.
861                  *
862                  * We just need to grow our outbuffer and try again.
863                  */
864
865                 converted = outbuf - out;
866                 if (errno == E2BIG) {
867                         outlen += inleft * 2 + 16;
868                         out = g_realloc (out, outlen + 4);
869                         outbuf = out + converted;
870                 }
871         } while (errno == E2BIG && inleft > 0);
872
873         /*
874          * EINVAL  An  incomplete  multibyte sequence has been encoun­
875          *         tered in the input.
876          *
877          * We'll just have to ignore it...
878          */
879
880         /* flush the iconv conversion */
881         while (iconv (cd, NULL, NULL, &outbuf, &outleft) == (gsize) -1) {
882                 if (errno != E2BIG)
883                         break;
884
885                 outlen += 16;
886                 converted = outbuf - out;
887                 out = g_realloc (out, outlen + 4);
888                 outleft = outlen - converted;
889                 outbuf = out + converted;
890         }
891
892         /* Note: not all charsets can be nul-terminated with a single
893          * nul byte. UCS2, for example, needs 2 nul bytes and UCS4
894          * needs 4. I hope that 4 nul bytes is enough to terminate all
895          * multibyte charsets? */
896
897         /* nul-terminate the string */
898         memset (outbuf, 0, 4);
899
900         /* reset the cd */
901         iconv (cd, NULL, NULL, NULL, NULL);
902
903         return out;
904
905  fail:
906
907         errnosav = errno;
908
909         w (g_warning ("camel_iconv_strndup: %s at byte %lu", g_strerror (errno), n - inleft));
910
911         g_free (out);
912
913         /* reset the cd */
914         iconv (cd, NULL, NULL, NULL, NULL);
915
916         errno = errnosav;
917
918         return NULL;
919 }
920
921 #define is_ascii(c) isascii ((gint) ((guchar) (c)))
922
923 static gchar *
924 decode_8bit (const gchar *text,
925              gsize len,
926              const gchar *default_charset)
927 {
928         const gchar *charsets[4] = { "UTF-8", NULL, NULL, NULL };
929         gsize inleft, outleft, outlen, rc, min, n;
930         const gchar *locale_charset, *best;
931         gchar *out, *outbuf;
932         const gchar *inbuf;
933         iconv_t cd;
934         gint i = 1;
935
936         if (default_charset && g_ascii_strcasecmp (default_charset, "UTF-8") != 0)
937                 charsets[i++] = default_charset;
938
939         locale_charset = camel_iconv_locale_charset ();
940         if (locale_charset && g_ascii_strcasecmp (locale_charset, "UTF-8") != 0)
941                 charsets[i++] = locale_charset;
942
943         min = len;
944         best = charsets[0];
945
946         outlen = (len * 2) + 16;
947         out = g_malloc (outlen + 1);
948
949         for (i = 0; charsets[i]; i++) {
950                 if ((cd = camel_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1)
951                         continue;
952
953                 outleft = outlen;
954                 outbuf = out;
955                 inleft = len;
956                 inbuf = text;
957                 n = 0;
958
959                 do {
960                         rc = iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
961                         if (rc == (gsize) -1) {
962                                 if (errno == EINVAL) {
963                                         /* incomplete sequence at the end of the input buffer */
964                                         n += inleft;
965                                         break;
966                                 }
967
968                                 if (errno == E2BIG) {
969                                         outlen += (inleft * 2) + 16;
970                                         rc = (gsize) (outbuf - out);
971                                         out = g_realloc (out, outlen + 1);
972                                         outleft = outlen - rc;
973                                         outbuf = out + rc;
974                                 } else {
975                                         inleft--;
976                                         inbuf++;
977                                         n++;
978                                 }
979                         }
980                 } while (inleft > 0);
981
982                 while ((rc = iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
983                         if (errno != E2BIG)
984                                 break;
985
986                         outlen += 16;
987                         rc = (gsize) (outbuf - out);
988                         out = g_realloc (out, outlen + 1);
989                         outleft = outlen - rc;
990                         outbuf = out + rc;
991                 }
992
993                 *outbuf = '\0';
994
995                 camel_iconv_close (cd);
996
997                 if (rc != (gsize) -1 && n == 0)
998                         return out;
999
1000                 if (n < min) {
1001                         best = charsets[i];
1002                         min = n;
1003                 }
1004         }
1005
1006         /* if we get here, then none of the charsets fit the 8bit text flawlessly...
1007          * try to find the one that fit the best and use that to convert what we can,
1008          * replacing any byte we can't convert with a '?' */
1009
1010         if ((cd = camel_iconv_open ("UTF-8", best)) == (iconv_t) -1) {
1011                 /* this shouldn't happen... but if we are here, then
1012                  * it did...  the only thing we can do at this point
1013                  * is replace the 8bit garbage and pray */
1014                 register const gchar *inptr = text;
1015                 const gchar *inend = inptr + len;
1016
1017                 outbuf = out;
1018
1019                 while (inptr < inend) {
1020                         if (is_ascii (*inptr))
1021                                 *outbuf++ = *inptr++;
1022                         else
1023                                 *outbuf++ = '?';
1024                 }
1025
1026                 *outbuf = '\0';
1027
1028                 return out;
1029         }
1030
1031         outleft = outlen;
1032         outbuf = out;
1033         inleft = len;
1034         inbuf = text;
1035
1036         do {
1037                 rc = iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
1038                 if (rc == (gsize) -1) {
1039                         if (errno == EINVAL) {
1040                                 /* incomplete sequence at the end of the input buffer */
1041                                 break;
1042                         }
1043
1044                         if (errno == E2BIG) {
1045                                 rc = outbuf - out;
1046                                 outlen += inleft * 2 + 16;
1047                                 out = g_realloc (out, outlen + 1);
1048                                 outleft = outlen - rc;
1049                                 outbuf = out + rc;
1050                         } else {
1051                                 *outbuf++ = '?';
1052                                 outleft--;
1053                                 inleft--;
1054                                 inbuf++;
1055                         }
1056                 }
1057         } while (inleft > 0);
1058
1059         while ((rc = iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
1060                 if (errno != E2BIG)
1061                         break;
1062
1063                 outlen += 16;
1064                 rc = (gsize) (outbuf - out);
1065                 out = g_realloc (out, outlen + 1);
1066                 outleft = outlen - rc;
1067                 outbuf = out + rc;
1068         }
1069
1070         *outbuf = '\0';
1071
1072         camel_iconv_close (cd);
1073
1074         return out;
1075 }
1076
1077 #define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
1078
1079 /* decode an rfc2047 encoded-word token */
1080 static gchar *
1081 rfc2047_decode_word (const gchar *in,
1082                      gsize inlen,
1083                      const gchar *default_charset)
1084 {
1085         const guchar *instart = (const guchar *) in;
1086         const guchar *inptr = instart + 2;
1087         const guchar *inend = instart + inlen - 2;
1088         guchar *decoded;
1089         const gchar *charset;
1090         gchar *charenc, *p;
1091         guint32 save = 0;
1092         gssize declen;
1093         gint state = 0;
1094         gsize len;
1095         iconv_t cd;
1096         gchar *buf;
1097
1098         /* skip over the charset */
1099         if (inlen < 8 || !(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
1100                 return NULL;
1101
1102         inptr++;
1103
1104         switch (*inptr) {
1105         case 'B':
1106         case 'b':
1107                 inptr += 2;
1108                 decoded = g_alloca (inend - inptr);
1109                 declen = g_base64_decode_step ((gchar *) inptr, inend - inptr, decoded, &state, &save);
1110                 break;
1111         case 'Q':
1112         case 'q':
1113                 inptr += 2;
1114                 decoded = g_alloca (inend - inptr);
1115                 declen = quoted_decode (inptr, inend - inptr, decoded);
1116
1117                 if (declen == -1) {
1118                         d (fprintf (stderr, "encountered broken 'Q' encoding\n"));
1119                         return NULL;
1120                 }
1121                 break;
1122         default:
1123                 d (fprintf (stderr, "unknown encoding\n"));
1124                 return NULL;
1125         }
1126
1127         /* never return empty string, return rather NULL */
1128         if (!declen)
1129                 return NULL;
1130
1131         len = (inptr - 3) - (instart + 2);
1132         charenc = g_alloca (len + 1);
1133         memcpy (charenc, in + 2, len);
1134         charenc[len] = '\0';
1135         charset = charenc;
1136
1137         /* rfc2231 updates rfc2047 encoded words...
1138          * The ABNF given in RFC 2047 for encoded-words is:
1139          *   encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
1140          * This specification changes this ABNF to:
1141          *   encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
1142          */
1143
1144         /* trim off the 'language' part if it's there... */
1145         if ((p = strchr (charset, '*')))
1146                 *p = '\0';
1147
1148         /* slight optimization? */
1149         if (!g_ascii_strcasecmp (charset, "UTF-8")) {
1150                 p = (gchar *) decoded;
1151                 len = declen;
1152
1153                 while (!g_utf8_validate (p, len, (const gchar **) &p)) {
1154                         len = declen - (p - (gchar *) decoded);
1155                         *p = '?';
1156                 }
1157
1158                 return g_strndup ((gchar *) decoded, declen);
1159         }
1160
1161         if (charset[0])
1162                 charset = camel_iconv_charset_name (charset);
1163
1164         if (!charset[0] || (cd = camel_iconv_open ("UTF-8", charset)) == (iconv_t) -1) {
1165                 w (g_warning (
1166                         "Cannot convert from %s to UTF-8, "
1167                         "header display may be corrupt: %s",
1168                         charset[0] ? charset : "unspecified charset",
1169                         g_strerror (errno)));
1170
1171                 return decode_8bit ((gchar *) decoded, declen, default_charset);
1172         }
1173
1174         buf = camel_iconv_strndup (cd, (gchar *) decoded, declen);
1175         camel_iconv_close (cd);
1176
1177         if (buf != NULL)
1178                 return buf;
1179
1180         w (g_warning (
1181                 "Failed to convert \"%.*s\" to UTF-8, display may be "
1182                 "corrupt: %s", declen, decoded, g_strerror (errno)));
1183
1184         return decode_8bit ((gchar *) decoded, declen, charset);
1185 }
1186
1187 /* ok, a lot of mailers are BROKEN, and send iso-latin1 encoded
1188  * headers, when they should just be sticking to US-ASCII
1189  * according to the rfc's.  Anyway, since the conversion to utf-8
1190  * is trivial, just do it here without iconv */
1191 static GString *
1192 append_latin1 (GString *out,
1193                const gchar *in,
1194                gsize len)
1195 {
1196         guint c;
1197
1198         while (len) {
1199                 c = (guint) * in++;
1200                 len--;
1201                 if (c & 0x80) {
1202                         out = g_string_append_c (out, 0xc0 | ((c >> 6) & 0x3));  /* 110000xx */
1203                         out = g_string_append_c (out, 0x80 | (c & 0x3f));        /* 10xxxxxx */
1204                 } else {
1205                         out = g_string_append_c (out, c);
1206                 }
1207         }
1208         return out;
1209 }
1210
1211 static gint
1212 append_8bit (GString *out,
1213              const gchar *inbuf,
1214              gsize inlen,
1215              const gchar *charset)
1216 {
1217         gchar *outbase, *outbuf;
1218         gsize outlen;
1219         iconv_t ic;
1220
1221         ic = camel_iconv_open ("UTF-8", charset);
1222         if (ic == (iconv_t) -1)
1223                 return FALSE;
1224
1225         outlen = inlen * 6 + 16;
1226         outbuf = outbase = g_malloc (outlen);
1227
1228         if (camel_iconv (ic, &inbuf, &inlen, &outbuf, &outlen) == (gsize) -1) {
1229                 w (g_warning ("Conversion to '%s' failed: %s", charset, g_strerror (errno)));
1230                 g_free (outbase);
1231                 camel_iconv_close (ic);
1232                 return FALSE;
1233         }
1234
1235         camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
1236
1237         *outbuf = 0;
1238         g_string_append (out, outbase);
1239         g_free (outbase);
1240         camel_iconv_close (ic);
1241
1242         return TRUE;
1243
1244 }
1245
1246 static GString *
1247 append_quoted_pair (GString *str,
1248                     const gchar *in,
1249                     gsize inlen)
1250 {
1251         register const gchar *inptr = in;
1252         const gchar *inend = in + inlen;
1253         gchar c;
1254
1255         while (inptr < inend) {
1256                 c = *inptr++;
1257                 if (c == '\\' && inptr < inend)
1258                         g_string_append_c (str, *inptr++);
1259                 else
1260                         g_string_append_c (str, c);
1261         }
1262
1263         return str;
1264 }
1265
1266 /* decodes a simple text, rfc822 + rfc2047 */
1267 static gchar *
1268 header_decode_text (const gchar *in,
1269                     gint ctext,
1270                     const gchar *default_charset)
1271 {
1272         register const gchar *inptr = in;
1273         gboolean encoded = FALSE;
1274         const gchar *lwsp, *text;
1275         gsize nlwsp, n;
1276         gboolean ascii;
1277         gchar *decoded;
1278         GString *out;
1279
1280         if (in == NULL)
1281                 return g_strdup ("");
1282
1283         out = g_string_sized_new (strlen (in) + 1);
1284
1285         while (*inptr != '\0') {
1286                 lwsp = inptr;
1287                 while (camel_mime_is_lwsp (*inptr))
1288                         inptr++;
1289
1290                 nlwsp = (gsize) (inptr - lwsp);
1291
1292                 if (*inptr != '\0') {
1293                         text = inptr;
1294                         ascii = TRUE;
1295
1296                         if (!strncmp (inptr, "=?", 2)) {
1297                                 inptr += 2;
1298
1299                                 /* skip past the charset (if one is even declared, sigh) */
1300                                 while (*inptr && *inptr != '?') {
1301                                         ascii = ascii && is_ascii (*inptr);
1302                                         inptr++;
1303                                 }
1304
1305                                 /* sanity check encoding type */
1306                                 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
1307                                         goto non_rfc2047;
1308
1309                                 inptr += 3;
1310
1311                                 /* find the end of the rfc2047 encoded word token */
1312                                 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
1313                                         ascii = ascii && is_ascii (*inptr);
1314                                         inptr++;
1315                                 }
1316
1317                                 if (!strncmp (inptr, "?=", 2))
1318                                         inptr += 2;
1319                         } else {
1320                         non_rfc2047:
1321                                 /* stop if we encounter a possible rfc2047 encoded
1322                                  * token even if it's inside another word, sigh. */
1323                                 while (*inptr && !camel_mime_is_lwsp (*inptr) &&
1324                                        strncmp (inptr, "=?", 2) != 0) {
1325                                         ascii = ascii && is_ascii (*inptr);
1326                                         inptr++;
1327                                 }
1328                         }
1329
1330                         n = (gsize) (inptr - text);
1331                         if (is_rfc2047_encoded_word (text, n)) {
1332                                 if ((decoded = rfc2047_decode_word (text, n, default_charset))) {
1333                                         /* rfc2047 states that you must ignore all
1334                                          * whitespace between encoded words */
1335                                         if (!encoded)
1336                                                 g_string_append_len (out, lwsp, nlwsp);
1337
1338                                         g_string_append (out, decoded);
1339                                         g_free (decoded);
1340
1341                                         encoded = TRUE;
1342                                 } else {
1343                                         /* append lwsp and invalid rfc2047 encoded-word token */
1344                                         g_string_append_len (out, lwsp, nlwsp + n);
1345                                         encoded = FALSE;
1346                                 }
1347                         } else {
1348                                 /* append lwsp */
1349                                 g_string_append_len (out, lwsp, nlwsp);
1350
1351                                 /* append word token */
1352                                 if (!ascii) {
1353                                         /* *sigh* I hate broken mailers... */
1354                                         decoded = decode_8bit (text, n, default_charset);
1355                                         n = strlen (decoded);
1356                                         text = decoded;
1357                                 } else {
1358                                         decoded = NULL;
1359                                 }
1360
1361                                 if (!ctext)
1362                                         g_string_append_len (out, text, n);
1363                                 else
1364                                         append_quoted_pair (out, text, n);
1365
1366                                 g_free (decoded);
1367
1368                                 encoded = FALSE;
1369                         }
1370                 } else {
1371                         /* appending trailing lwsp */
1372                         g_string_append_len (out, lwsp, nlwsp);
1373                         break;
1374                 }
1375         }
1376
1377         decoded = out->str;
1378         g_string_free (out, FALSE);
1379
1380         return decoded;
1381 }
1382
1383 /**
1384  * camel_header_decode_string:
1385  * @in: input header value string
1386  * @default_charset: default charset to use if improperly encoded
1387  *
1388  * Decodes rfc2047 encoded-word tokens
1389  *
1390  * Returns: a string containing the UTF-8 version of the decoded header
1391  * value
1392  **/
1393 gchar *
1394 camel_header_decode_string (const gchar *in,
1395                             const gchar *default_charset)
1396 {
1397         if (in == NULL)
1398                 return NULL;
1399
1400         return header_decode_text (in, FALSE, default_charset);
1401 }
1402
1403 /**
1404  * camel_header_format_ctext:
1405  * @in: input header value string
1406  * @default_charset: default charset to use if improperly encoded
1407  *
1408  * Decodes a header which contains rfc2047 encoded-word tokens that
1409  * may or may not be within a comment.
1410  *
1411  * Returns: a string containing the UTF-8 version of the decoded header
1412  * value
1413  **/
1414 gchar *
1415 camel_header_format_ctext (const gchar *in,
1416                            const gchar *default_charset)
1417 {
1418         if (in == NULL)
1419                 return NULL;
1420
1421         return header_decode_text (in, TRUE, default_charset);
1422 }
1423
1424 /* how long a sequence of pre-encoded words should be less than, to attempt to
1425  * fit into a properly folded word.  Only a guide. */
1426 #define CAMEL_FOLD_PREENCODED (24)
1427
1428 /* FIXME: needs a way to cache iconv opens for different charsets? */
1429 static void
1430 rfc2047_encode_word (GString *outstring,
1431                      const gchar *in,
1432                      gsize len,
1433                      const gchar *type,
1434                      gushort safemask)
1435 {
1436         iconv_t ic = (iconv_t) -1;
1437         gchar *buffer, *out, *ascii;
1438         gsize inlen, outlen, enclen, bufflen;
1439         const gchar *inptr, *p;
1440         gint first = 1;
1441
1442         d (printf ("Converting [%d] '%.*s' to %s\n", len, len, in, type));
1443
1444         /* convert utf8->encoding */
1445         bufflen = len * 6 + 16;
1446         buffer = g_alloca (bufflen);
1447         inlen = len;
1448         inptr = in;
1449
1450         ascii = g_alloca (bufflen);
1451
1452         if (g_ascii_strcasecmp (type, "UTF-8") != 0)
1453                 ic = camel_iconv_open (type, "UTF-8");
1454
1455         while (inlen) {
1456                 gssize convlen, proclen;
1457                 gint i;
1458
1459                 /* break up words into smaller bits, what we really want is encoded + overhead < 75,
1460                  * but we'll just guess what that means in terms of input chars, and assume its good enough */
1461
1462                 out = buffer;
1463                 outlen = bufflen;
1464
1465                 if (ic == (iconv_t) -1) {
1466                         /* native encoding case, the easy one (?) */
1467                         /* we work out how much we can convert, and still be in length */
1468                         /* proclen will be the result of input characters that we can convert, to the nearest
1469                          * (approximated) valid utf8 gchar */
1470                         convlen = 0;
1471                         proclen = -1;
1472                         p = inptr;
1473                         i = 0;
1474                         while (p < (in + len) && convlen < (75 - strlen ("=?utf-8?q?\?="))) {
1475                                 guchar c = *p++;
1476
1477                                 if (c >= 0xc0)
1478                                         proclen = i;
1479                                 i++;
1480                                 if (c < 0x80)
1481                                         proclen = i;
1482                                 if (camel_mime_special_table[c] & safemask)
1483                                         convlen += 1;
1484                                 else
1485                                         convlen += 3;
1486                         }
1487
1488                         if (proclen >= 0 && proclen < i && convlen < (75 - strlen ("=?utf-8?q?\?=")))
1489                                 proclen = i;
1490
1491                         /* well, we probably have broken utf8, just copy it anyway what the heck */
1492                         if (proclen == -1) {
1493                                 w (g_warning ("Appear to have truncated utf8 sequence"));
1494                                 proclen = inlen;
1495                         }
1496
1497                         memcpy (out, inptr, proclen);
1498                         inptr += proclen;
1499                         inlen -= proclen;
1500                         out += proclen;
1501                 } else {
1502                         /* well we could do similar, but we can't (without undue effort), we'll just break it up into
1503                          * hopefully-small-enough chunks, and leave it at that */
1504                         convlen = MIN (inlen, CAMEL_FOLD_PREENCODED);
1505                         p = inptr;
1506                         if (camel_iconv (ic, &inptr, (gsize *) &convlen, &out, &outlen) == (gsize) -1 && errno != EINVAL) {
1507                                 w (g_warning ("Conversion problem: conversion truncated: %s", g_strerror (errno)));
1508                                 /* blah, we include it anyway, better than infinite loop ... */
1509                                 inptr += convlen;
1510                         } else {
1511                                 /* make sure we flush out any shift state */
1512                                 camel_iconv (ic, NULL, NULL, &out, &outlen);
1513                         }
1514                         inlen -= (inptr - p);
1515                 }
1516
1517                 enclen = out - buffer;
1518
1519                 if (enclen) {
1520                         /* create token */
1521                         out = ascii;
1522                         if (first)
1523                                 first = 0;
1524                         else
1525                                 *out++ = ' ';
1526                         out += sprintf (out, "=?%s?Q?", type);
1527                         out += quoted_encode ((guchar *) buffer, enclen, (guchar *) out, safemask);
1528                         sprintf (out, "?=");
1529
1530                         d (printf ("converted part = %s\n", ascii));
1531
1532                         g_string_append (outstring, ascii);
1533                 }
1534         }
1535
1536         if (ic != (iconv_t) -1)
1537                 camel_iconv_close (ic);
1538 }
1539
1540 static gchar *
1541 header_encode_string_rfc2047 (const guchar *in,
1542                               gboolean include_lwsp)
1543 {
1544         const guchar *inptr = in, *start, *word;
1545         gboolean last_was_encoded = FALSE;
1546         gboolean last_was_space = FALSE;
1547         const gchar *charset;
1548         gint encoding;
1549         GString *out;
1550         gchar *outstr;
1551
1552         g_return_val_if_fail (g_utf8_validate ((const gchar *) in, -1, NULL), NULL);
1553
1554         if (in == NULL)
1555                 return NULL;
1556
1557         /* do a quick us-ascii check (the common case?) */
1558         while (*inptr) {
1559                 if (*inptr > 127)
1560                         break;
1561                 inptr++;
1562         }
1563         if (*inptr == '\0')
1564                 return g_strdup ((gchar *) in);
1565
1566         /* This gets each word out of the input, and checks to see what charset
1567          * can be used to encode it. */
1568         /* TODO: Work out when to merge subsequent words, or across word-parts */
1569         out = g_string_new ("");
1570         inptr = in;
1571         encoding = 0;
1572         word = NULL;
1573         start = inptr;
1574         while (inptr && *inptr) {
1575                 gunichar c;
1576                 const gchar *newinptr;
1577
1578                 newinptr = g_utf8_next_char (inptr);
1579                 c = g_utf8_get_char ((gchar *) inptr);
1580                 if (newinptr == NULL || !g_unichar_validate (c)) {
1581                         w (g_warning (
1582                                 "Invalid UTF-8 sequence encountered "
1583                                 "(pos %d, gchar '%c'): %s",
1584                                 (inptr - in), inptr[0], in));
1585                         inptr++;
1586                         continue;
1587                 }
1588
1589                 if (c < 256 && !include_lwsp && camel_mime_is_lwsp (c) && !last_was_space) {
1590                         /* we've reached the end of a 'word' */
1591                         if (word && !(last_was_encoded && encoding)) {
1592                                 /* output lwsp between non-encoded words */
1593                                 g_string_append_len (out, (const gchar *) start, word - start);
1594                                 start = word;
1595                         }
1596
1597                         switch (encoding) {
1598                         case 0:
1599                                 g_string_append_len (out, (const gchar *) start, inptr - start);
1600                                 last_was_encoded = FALSE;
1601                                 break;
1602                         case 1:
1603                                 if (last_was_encoded)
1604                                         g_string_append_c (out, ' ');
1605
1606                                 rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1607                                 last_was_encoded = TRUE;
1608                                 break;
1609                         case 2:
1610                                 if (last_was_encoded)
1611                                         g_string_append_c (out, ' ');
1612
1613                                 if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1614                                         charset = "UTF-8";
1615                                 rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1616                                 last_was_encoded = TRUE;
1617                                 break;
1618                         }
1619
1620                         last_was_space = TRUE;
1621                         start = inptr;
1622                         word = NULL;
1623                         encoding = 0;
1624                 } else if (c > 127 && c < 256) {
1625                         encoding = MAX (encoding, 1);
1626                         last_was_space = FALSE;
1627                 } else if (c >= 256) {
1628                         encoding = MAX (encoding, 2);
1629                         last_was_space = FALSE;
1630                 } else if (include_lwsp || !camel_mime_is_lwsp (c)) {
1631                         last_was_space = FALSE;
1632                 }
1633
1634                 if (!(c < 256 && !include_lwsp && camel_mime_is_lwsp (c)) && !word)
1635                         word = inptr;
1636
1637                 inptr = (const guchar *) newinptr;
1638         }
1639
1640         if (inptr - start) {
1641                 if (word && !(last_was_encoded && encoding)) {
1642                         g_string_append_len (out, (const gchar *) start, word - start);
1643                         start = word;
1644                 }
1645
1646                 switch (encoding) {
1647                 case 0:
1648                         g_string_append_len (out, (const gchar *) start, inptr - start);
1649                         break;
1650                 case 1:
1651                         if (last_was_encoded)
1652                                 g_string_append_c (out, ' ');
1653
1654                         rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1655                         break;
1656                 case 2:
1657                         if (last_was_encoded)
1658                                 g_string_append_c (out, ' ');
1659
1660                         if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1661                                 charset = "UTF-8";
1662                         rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1663                         break;
1664                 }
1665         }
1666
1667         outstr = out->str;
1668         g_string_free (out, FALSE);
1669
1670         return outstr;
1671 }
1672
1673 /* TODO: Should this worry about quotes?? */
1674 /**
1675  * camel_header_encode_string:
1676  * @in: input string
1677  *
1678  * Encodes a 'text' header according to the rules of rfc2047.
1679  *
1680  * Returns: the rfc2047 encoded header
1681  **/
1682 gchar *
1683 camel_header_encode_string (const guchar *in)
1684 {
1685         return header_encode_string_rfc2047 (in, FALSE);
1686 }
1687
1688 /* apply quoted-string rules to a string */
1689 static void
1690 quote_word (GString *out,
1691             gboolean do_quotes,
1692             const gchar *start,
1693             gsize len)
1694 {
1695         gint i, c;
1696
1697         /* TODO: What about folding on long lines? */
1698         if (do_quotes)
1699                 g_string_append_c (out, '"');
1700         for (i = 0; i < len; i++) {
1701                 c = *start++;
1702                 if (c == '\"' || c == '\\' || c == '\r')
1703                         g_string_append_c (out, '\\');
1704                 g_string_append_c (out, c);
1705         }
1706         if (do_quotes)
1707                 g_string_append_c (out, '"');
1708 }
1709
1710 /* incrementing possibility for the word type */
1711 enum _phrase_word_t {
1712         WORD_ATOM,
1713         WORD_QSTRING,
1714         WORD_2047
1715 };
1716
1717 struct _phrase_word {
1718         const guchar *start, *end;
1719         enum _phrase_word_t type;
1720         gint encoding;
1721 };
1722
1723 static gboolean
1724 word_types_compatable (enum _phrase_word_t type1,
1725                        enum _phrase_word_t type2)
1726 {
1727         switch (type1) {
1728         case WORD_ATOM:
1729                 return type2 == WORD_QSTRING;
1730         case WORD_QSTRING:
1731                 return type2 != WORD_2047;
1732         case WORD_2047:
1733                 return type2 == WORD_2047;
1734         default:
1735                 return FALSE;
1736         }
1737 }
1738
1739 /* split the input into words with info about each word
1740  * merge common word types clean up */
1741 static GList *
1742 header_encode_phrase_get_words (const guchar *in)
1743 {
1744         const guchar *inptr = in, *start, *last;
1745         struct _phrase_word *word;
1746         enum _phrase_word_t type;
1747         gint encoding, count = 0;
1748         GList *words = NULL;
1749
1750         /* break the input into words */
1751         type = WORD_ATOM;
1752         last = inptr;
1753         start = inptr;
1754         encoding = 0;
1755         while (inptr && *inptr) {
1756                 gunichar c;
1757                 const gchar *newinptr;
1758
1759                 newinptr = g_utf8_next_char (inptr);
1760                 c = g_utf8_get_char ((gchar *) inptr);
1761
1762                 if (!g_unichar_validate (c)) {
1763                         w (g_warning (
1764                                 "Invalid UTF-8 sequence encountered "
1765                                 "(pos %d, gchar '%c'): %s",
1766                                 (inptr - in), inptr[0], in));
1767                         inptr++;
1768                         continue;
1769                 }
1770
1771                 inptr = (const guchar *) newinptr;
1772                 if (g_unichar_isspace (c)) {
1773                         if (count > 0) {
1774                                 word = g_new0 (struct _phrase_word, 1);
1775                                 word->start = start;
1776                                 word->end = last;
1777                                 word->type = type;
1778                                 word->encoding = encoding;
1779                                 words = g_list_append (words, word);
1780                                 count = 0;
1781                         }
1782
1783                         start = inptr;
1784                         type = WORD_ATOM;
1785                         encoding = 0;
1786                 } else {
1787                         count++;
1788                         if (c < 128) {
1789                                 if (!camel_mime_is_atom (c))
1790                                         type = MAX (type, WORD_QSTRING);
1791                         } else if (c > 127 && c < 256) {
1792                                 type = WORD_2047;
1793                                 encoding = MAX (encoding, 1);
1794                         } else if (c >= 256) {
1795                                 type = WORD_2047;
1796                                 encoding = MAX (encoding, 2);
1797                         }
1798                 }
1799
1800                 last = inptr;
1801         }
1802
1803         if (count > 0) {
1804                 word = g_new0 (struct _phrase_word, 1);
1805                 word->start = start;
1806                 word->end = last;
1807                 word->type = type;
1808                 word->encoding = encoding;
1809                 words = g_list_append (words, word);
1810         }
1811
1812         return words;
1813 }
1814
1815 #define MERGED_WORD_LT_FOLDLEN(wordlen, type) ((type) == WORD_2047 ? (wordlen) < CAMEL_FOLD_PREENCODED : (wordlen) < (CAMEL_FOLD_SIZE - 8))
1816
1817 static gboolean
1818 header_encode_phrase_merge_words (GList **wordsp)
1819 {
1820         GList *wordl, *nextl, *words = *wordsp;
1821         struct _phrase_word *word, *next;
1822         gboolean merged = FALSE;
1823
1824         /* scan the list, checking for words of similar types that can be merged */
1825         wordl = words;
1826         while (wordl) {
1827                 word = wordl->data;
1828                 nextl = g_list_next (wordl);
1829
1830                 while (nextl) {
1831                         next = nextl->data;
1832                         /* merge nodes of the same type AND we are not creating too long a string */
1833                         if (word_types_compatable (word->type, next->type)) {
1834                                 if (MERGED_WORD_LT_FOLDLEN (next->end - word->start, MAX (word->type, next->type))) {
1835                                         /* the resulting word type is the MAX of the 2 types */
1836                                         word->type = MAX (word->type, next->type);
1837                                         word->encoding = MAX (word->encoding, next->encoding);
1838                                         word->end = next->end;
1839                                         words = g_list_remove_link (words, nextl);
1840                                         g_list_free_1 (nextl);
1841                                         g_free (next);
1842
1843                                         nextl = g_list_next (wordl);
1844
1845                                         merged = TRUE;
1846                                 } else {
1847                                         /* if it is going to be too long, make sure we include the
1848                                          * separating whitespace */
1849                                         word->end = next->start;
1850                                         break;
1851                                 }
1852                         } else {
1853                                 break;
1854                         }
1855                 }
1856
1857                 wordl = g_list_next (wordl);
1858         }
1859
1860         *wordsp = words;
1861
1862         return merged;
1863 }
1864
1865 /* encodes a phrase sequence (different quoting/encoding rules to strings) */
1866 /**
1867  * camel_header_encode_phrase:
1868  * @in: header to encode
1869  *
1870  * Encodes a 'phrase' header according to the rules in rfc2047.
1871  *
1872  * Returns: the encoded 'phrase'
1873  **/
1874 gchar *
1875 camel_header_encode_phrase (const guchar *in)
1876 {
1877         struct _phrase_word *word = NULL, *last_word = NULL;
1878         GList *words, *wordl;
1879         const gchar *charset;
1880         GString *out;
1881         gchar *outstr;
1882
1883         if (in == NULL)
1884                 return NULL;
1885
1886         words = header_encode_phrase_get_words (in);
1887         if (!words)
1888                 return NULL;
1889
1890         while (header_encode_phrase_merge_words (&words))
1891                 ;
1892
1893         out = g_string_new ("");
1894
1895         /* output words now with spaces between them */
1896         wordl = words;
1897         while (wordl) {
1898                 const gchar *start;
1899                 gsize len;
1900
1901                 word = wordl->data;
1902
1903                 /* append correct number of spaces between words */
1904                 if (last_word && !(last_word->type == WORD_2047 && word->type == WORD_2047)) {
1905                         /* one or both of the words are not encoded so we write the spaces out untouched */
1906                         len = word->start - last_word->end;
1907                         out = g_string_append_len (out, (gchar *) last_word->end, len);
1908                 }
1909
1910                 switch (word->type) {
1911                 case WORD_ATOM:
1912                         out = g_string_append_len (out, (gchar *) word->start, word->end - word->start);
1913                         break;
1914                 case WORD_QSTRING:
1915                         quote_word (out, TRUE, (gchar *) word->start, word->end - word->start);
1916                         break;
1917                 case WORD_2047:
1918                         if (last_word && last_word->type == WORD_2047) {
1919                                 /* include the whitespace chars between these 2 words in the
1920                                  * resulting rfc2047 encoded word. */
1921                                 len = word->end - last_word->end;
1922                                 start = (const gchar *) last_word->end;
1923
1924                                 /* encoded words need to be separated by linear whitespace */
1925                                 g_string_append_c (out, ' ');
1926                         } else {
1927                                 len = word->end - word->start;
1928                                 start = (const gchar *) word->start;
1929                         }
1930
1931                         if (word->encoding == 1) {
1932                                 rfc2047_encode_word (out, start, len, "ISO-8859-1", CAMEL_MIME_IS_PSAFE);
1933                         } else {
1934                                 if (!(charset = camel_charset_best (start, len)))
1935                                         charset = "UTF-8";
1936                                 rfc2047_encode_word (out, start, len, charset, CAMEL_MIME_IS_PSAFE);
1937                         }
1938                         break;
1939                 }
1940
1941                 g_free (last_word);
1942                 wordl = g_list_next (wordl);
1943
1944                 last_word = word;
1945         }
1946
1947         /* and we no longer need the list */
1948         g_free (word);
1949         g_list_free (words);
1950
1951         outstr = out->str;
1952         g_string_free (out, FALSE);
1953
1954         return outstr;
1955 }
1956
1957 /* these are all internal parser functions */
1958
1959 static gchar *
1960 decode_token (const gchar **in)
1961 {
1962         const gchar *inptr = *in;
1963         const gchar *start;
1964
1965         header_decode_lwsp (&inptr);
1966         start = inptr;
1967         while (camel_mime_is_ttoken (*inptr))
1968                 inptr++;
1969         if (inptr > start) {
1970                 *in = inptr;
1971                 return g_strndup (start, inptr - start);
1972         } else {
1973                 return NULL;
1974         }
1975 }
1976
1977 /**
1978  * camel_header_token_decode:
1979  * @in: input string
1980  *
1981  * Gets the first token in the string according to the rules of
1982  * rfc0822.
1983  *
1984  * Returns: a new string containing the first token in @in
1985  **/
1986 gchar *
1987 camel_header_token_decode (const gchar *in)
1988 {
1989         if (in == NULL)
1990                 return NULL;
1991
1992         return decode_token (&in);
1993 }
1994
1995 /*
1996  * <"> * ( <any gchar except <"> \, cr  /  \ <any char> ) <">
1997 */
1998 static gchar *
1999 header_decode_quoted_string (const gchar **in)
2000 {
2001         const gchar *inptr = *in;
2002         gchar *out = NULL, *outptr;
2003         gsize outlen;
2004         gint c;
2005
2006         header_decode_lwsp (&inptr);
2007         if (*inptr == '"') {
2008                 const gchar *intmp;
2009                 gint skip = 0;
2010
2011                 /* first, calc length */
2012                 inptr++;
2013                 intmp = inptr;
2014                 while ( (c = *intmp++) && c!= '"') {
2015                         if (c == '\\' && *intmp) {
2016                                 intmp++;
2017                                 skip++;
2018                         }
2019                 }
2020                 outlen = intmp - inptr - skip;
2021                 out = outptr = g_malloc (outlen + 1);
2022                 while ( (c = *inptr) && c!= '"') {
2023                         inptr++;
2024                         if (c == '\\' && *inptr) {
2025                                 c = *inptr++;
2026                         }
2027                         *outptr++ = c;
2028                 }
2029                 if (c)
2030                         inptr++;
2031                 *outptr = '\0';
2032         }
2033         *in = inptr;
2034         return out;
2035 }
2036
2037 static gchar *
2038 header_decode_atom (const gchar **in)
2039 {
2040         const gchar *inptr = *in, *start;
2041
2042         header_decode_lwsp (&inptr);
2043         start = inptr;
2044         while (camel_mime_is_atom (*inptr))
2045                 inptr++;
2046         *in = inptr;
2047         if (inptr > start)
2048                 return g_strndup (start, inptr - start);
2049         else
2050                 return NULL;
2051 }
2052
2053 static gboolean
2054 extract_rfc2047_encoded_word (const gchar **in,
2055                               gchar **word)
2056 {
2057         const gchar *inptr = *in, *start;
2058
2059         header_decode_lwsp (&inptr);
2060         start = inptr;
2061
2062         if (!strncmp (inptr, "=?", 2)) {
2063                 inptr += 2;
2064
2065                 /* skip past the charset (if one is even declared, sigh) */
2066                 while (*inptr && *inptr != '?') {
2067                         inptr++;
2068                 }
2069
2070                 /* sanity check encoding type */
2071                 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
2072                         return FALSE;
2073
2074                 inptr += 3;
2075
2076                 /* find the end of the rfc2047 encoded word token */
2077                 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
2078                         inptr++;
2079                 }
2080
2081                 if (!strncmp (inptr, "?=", 2)) {
2082                         inptr += 2;
2083
2084                         *in = inptr;
2085                         *word = g_strndup (start, inptr - start);
2086
2087                         return TRUE;
2088                 }
2089         }
2090
2091         return FALSE;
2092 }
2093
2094 static gchar *
2095 header_decode_word (const gchar **in)
2096 {
2097         const gchar *inptr = *in;
2098         gchar *word = NULL;
2099
2100         header_decode_lwsp (&inptr);
2101         *in = inptr;
2102
2103         if (*inptr == '"') {
2104                 return header_decode_quoted_string (in);
2105         } else if (*inptr == '=' && inptr[1] == '?' && extract_rfc2047_encoded_word (in, &word) && word) {
2106                 return word;
2107         } else {
2108                 return header_decode_atom (in);
2109         }
2110 }
2111
2112 static gchar *
2113 header_decode_value (const gchar **in)
2114 {
2115         const gchar *inptr = *in;
2116
2117         header_decode_lwsp (&inptr);
2118         if (*inptr == '"') {
2119                 d (printf ("decoding quoted string\n"));
2120                 return header_decode_quoted_string (in);
2121         } else if (camel_mime_is_ttoken (*inptr)) {
2122                 d (printf ("decoding token\n"));
2123                 /* this may not have the right specials for all params? */
2124                 return decode_token (in);
2125         }
2126         return NULL;
2127 }
2128
2129 /* should this return -1 for no int? */
2130
2131 /**
2132  * camel_header_decode_int:
2133  * @in: pointer to input string
2134  *
2135  * Extracts an integer token from @in and updates the pointer to point
2136  * to after the end of the integer token (sort of like strtol).
2137  *
2138  * Returns: the gint value
2139  **/
2140 gint
2141 camel_header_decode_int (const gchar **in)
2142 {
2143         const gchar *inptr = *in;
2144         gint c, v = 0;
2145
2146         header_decode_lwsp (&inptr);
2147         while ( (c=*inptr++ & 0xff)
2148                 && isdigit (c) ) {
2149                 v = v * 10 + (c - '0');
2150         }
2151         *in = inptr-1;
2152         return v;
2153 }
2154
2155 #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
2156
2157 static gchar *
2158 hex_decode (const gchar *in,
2159             gsize len)
2160 {
2161         const guchar *inend = (const guchar *) (in + len);
2162         guchar *inptr, *outptr;
2163         gchar *outbuf;
2164
2165         outbuf = (gchar *) g_malloc (len + 1);
2166         outptr = (guchar *) outbuf;
2167
2168         inptr = (guchar *) in;
2169         while (inptr < inend) {
2170                 if (*inptr == '%') {
2171                         if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
2172                                 *outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
2173                                 inptr += 3;
2174                         } else
2175                                 *outptr++ = *inptr++;
2176                 } else
2177                         *outptr++ = *inptr++;
2178         }
2179
2180         *outptr = '\0';
2181
2182         return outbuf;
2183 }
2184
2185 /* Tries to convert @in @from charset @to charset.  Any failure, we get no data out rather than partial conversion */
2186 static gchar *
2187 header_convert (const gchar *to,
2188                 const gchar *from,
2189                 const gchar *in,
2190                 gsize inlen)
2191 {
2192         iconv_t ic;
2193         gsize outlen, ret;
2194         gchar *outbuf, *outbase, *result = NULL;
2195
2196         ic = camel_iconv_open (to, from);
2197         if (ic == (iconv_t) -1)
2198                 return NULL;
2199
2200         outlen = inlen * 6 + 16;
2201         outbuf = outbase = g_malloc (outlen);
2202
2203         ret = camel_iconv (ic, &in, &inlen, &outbuf, &outlen);
2204         if (ret != (gsize) -1) {
2205                 camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
2206                 *outbuf = '\0';
2207                 result = g_strdup (outbase);
2208         }
2209         camel_iconv_close (ic);
2210         g_free (outbase);
2211
2212         return result;
2213 }
2214
2215 /* an rfc2184 encoded string looks something like:
2216  * us-ascii'en'This%20is%20even%20more%20
2217  */
2218
2219 static gchar *
2220 rfc2184_decode (const gchar *in,
2221                 gsize len)
2222 {
2223         const gchar *inptr = in;
2224         const gchar *inend = in + len;
2225         const gchar *charset;
2226         gchar *decoded, *decword, *encoding;
2227
2228         inptr = memchr (inptr, '\'', len);
2229         if (!inptr)
2230                 return NULL;
2231
2232         encoding = g_alloca (inptr - in + 1);
2233         memcpy (encoding, in, inptr - in);
2234         encoding[inptr - in] = 0;
2235         charset = camel_iconv_charset_name (encoding);
2236
2237         inptr = memchr (inptr + 1, '\'', inend - inptr - 1);
2238         if (!inptr)
2239                 return NULL;
2240         inptr++;
2241         if (inptr >= inend)
2242                 return NULL;
2243
2244         decword = hex_decode (inptr, inend - inptr);
2245         decoded = header_convert ("UTF-8", charset, decword, strlen (decword));
2246         g_free (decword);
2247
2248         return decoded;
2249 }
2250
2251 /**
2252  * camel_header_param:
2253  * @params: parameters
2254  * @name: name of param to find
2255  *
2256  * Searches @params for a param named @name and gets the value.
2257  *
2258  * Returns: the value of the @name param
2259  **/
2260 gchar *
2261 camel_header_param (struct _camel_header_param *params,
2262                     const gchar *name)
2263 {
2264         while (params && params->name &&
2265                g_ascii_strcasecmp (params->name, name) != 0)
2266                 params = params->next;
2267         if (params)
2268                 return params->value;
2269
2270         return NULL;
2271 }
2272
2273 /**
2274  * camel_header_set_param:
2275  * @paramsp: poinetr to a list of params
2276  * @name: name of param to set
2277  * @value: value to set
2278  *
2279  * Set a parameter in the list.
2280  *
2281  * Returns: the set param
2282  **/
2283 struct _camel_header_param *
2284 camel_header_set_param (struct _camel_header_param **l,
2285                         const gchar *name,
2286                         const gchar *value)
2287 {
2288         struct _camel_header_param *p = (struct _camel_header_param *) l, *pn;
2289
2290         if (name == NULL)
2291                 return NULL;
2292
2293         while (p->next) {
2294                 pn = p->next;
2295                 if (!g_ascii_strcasecmp (pn->name, name)) {
2296                         g_free (pn->value);
2297                         if (value) {
2298                                 pn->value = g_strdup (value);
2299                                 return pn;
2300                         } else {
2301                                 p->next = pn->next;
2302                                 g_free (pn->name);
2303                                 g_free (pn);
2304                                 return NULL;
2305                         }
2306                 }
2307                 p = pn;
2308         }
2309
2310         if (value == NULL)
2311                 return NULL;
2312
2313         pn = g_malloc (sizeof (*pn));
2314         pn->next = NULL;
2315         pn->name = g_strdup (name);
2316         pn->value = g_strdup (value);
2317         p->next = pn;
2318
2319         return pn;
2320 }
2321
2322 /**
2323  * camel_content_type_param:
2324  * @content_type: a #CamelContentType
2325  * @name: name of param to find
2326  *
2327  * Searches the params on s #CamelContentType for a param named @name
2328  * and gets the value.
2329  *
2330  * Returns: the value of the @name param
2331  **/
2332 const gchar *
2333 camel_content_type_param (CamelContentType *t,
2334                           const gchar *name)
2335 {
2336         if (t == NULL)
2337                 return NULL;
2338         return camel_header_param (t->params, name);
2339 }
2340
2341 /**
2342  * camel_content_type_set_param:
2343  * @content_type: a #CamelContentType
2344  * @name: name of param to set
2345  * @value: value of param to set
2346  *
2347  * Set a parameter on @content_type.
2348  **/
2349 void
2350 camel_content_type_set_param (CamelContentType *t,
2351                               const gchar *name,
2352                               const gchar *value)
2353 {
2354         camel_header_set_param (&t->params, name, value);
2355 }
2356
2357 /**
2358  * camel_content_type_is:
2359  * @content_type: A content type specifier, or %NULL.
2360  * @type: A type to check against.
2361  * @subtype: A subtype to check against, or "*" to match any subtype.
2362  *
2363  * The subtype of "*" will match any subtype.  If @ct is %NULL, then
2364  * it will match the type "text/plain".
2365  *
2366  * Returns: %TRUE if the content type @ct is of type @type/@subtype or
2367  * %FALSE otherwise
2368  **/
2369 gint
2370 camel_content_type_is (CamelContentType *ct,
2371                        const gchar *type,
2372                        const gchar *subtype)
2373 {
2374         /* no type == text/plain or text/"*" */
2375         if (ct == NULL || (ct->type == NULL && ct->subtype == NULL)) {
2376                 return (!g_ascii_strcasecmp (type, "text")
2377                         && (!g_ascii_strcasecmp (subtype, "plain")
2378                         || !strcmp (subtype, "*")));
2379         }
2380
2381         return (ct->type != NULL
2382                 && (!g_ascii_strcasecmp (ct->type, type)
2383                 && ((ct->subtype != NULL
2384                 && !g_ascii_strcasecmp (ct->subtype, subtype))
2385                         || !strcmp ("*", subtype))));
2386 }
2387
2388 /**
2389  * camel_header_param_list_free:
2390  * @params: a list of params
2391  *
2392  * Free the list of params.
2393  **/
2394 void
2395 camel_header_param_list_free (struct _camel_header_param *p)
2396 {
2397         struct _camel_header_param *n;
2398
2399         while (p) {
2400                 n = p->next;
2401                 g_free (p->name);
2402                 g_free (p->value);
2403                 g_free (p);
2404                 p = n;
2405         }
2406 }
2407
2408 /**
2409  * camel_content_type_new:
2410  * @type: the major type of the new content-type
2411  * @subtype: the subtype
2412  *
2413  * Create a new #CamelContentType.
2414  *
2415  * Returns: the new #CamelContentType
2416  **/
2417 CamelContentType *
2418 camel_content_type_new (const gchar *type,
2419                         const gchar *subtype)
2420 {
2421         CamelContentType *t;
2422
2423         t = g_slice_new (CamelContentType);
2424         t->type = g_strdup (type);
2425         t->subtype = g_strdup (subtype);
2426         t->params = NULL;
2427         t->refcount = 1;
2428
2429         return t;
2430 }
2431
2432 /**
2433  * camel_content_type_ref:
2434  * @content_type: a #CamelContentType
2435  *
2436  * Refs the content type.
2437  **/
2438 void
2439 camel_content_type_ref (CamelContentType *ct)
2440 {
2441         if (ct)
2442                 ct->refcount++;
2443 }
2444
2445 /**
2446  * camel_content_type_unref:
2447  * @content_type: a #CamelContentType
2448  *
2449  * Unrefs, and potentially frees, the content type.
2450  **/
2451 void
2452 camel_content_type_unref (CamelContentType *ct)
2453 {
2454         if (ct) {
2455                 if (ct->refcount <= 1) {
2456                         camel_header_param_list_free (ct->params);
2457                         g_free (ct->type);
2458                         g_free (ct->subtype);
2459                         g_slice_free (CamelContentType, ct);
2460                         ct = NULL;
2461                 } else {
2462                         ct->refcount--;
2463                 }
2464         }
2465 }
2466
2467 /* for decoding email addresses, canonically */
2468 static gchar *
2469 header_decode_domain (const gchar **in)
2470 {
2471         const gchar *inptr = *in;
2472         gint go = TRUE;
2473         gchar *ret;
2474         GString *domain = g_string_new ("");
2475
2476         /* domain ref | domain literal */
2477         header_decode_lwsp (&inptr);
2478         while (go) {
2479                 if (*inptr == '[') { /* domain literal */
2480                         domain = g_string_append_c (domain, '[');
2481                         inptr++;
2482                         header_decode_lwsp (&inptr);
2483                         while (*inptr && camel_mime_is_dtext (*inptr)) {
2484                                 domain = g_string_append_c (domain, *inptr);
2485                                 inptr++;
2486                         }
2487                         if (*inptr == ']') {
2488                                 domain = g_string_append_c (domain, ']');
2489                                 inptr++;
2490                         } else {
2491                                 w (g_warning ("closing ']' not found in domain: %s", *in));
2492                         }
2493                 } else {
2494                         gchar *a = header_decode_atom (&inptr);
2495                         if (a) {
2496                                 domain = g_string_append (domain, a);
2497                                 g_free (a);
2498                         } else {
2499                                 w (g_warning ("missing atom from domain-ref"));
2500                                 break;
2501                         }
2502                 }
2503                 header_decode_lwsp (&inptr);
2504                 if (*inptr == '.') { /* next sub-domain? */
2505                         domain = g_string_append_c (domain, '.');
2506                         inptr++;
2507                         header_decode_lwsp (&inptr);
2508                 } else
2509                         go = FALSE;
2510         }
2511
2512         *in = inptr;
2513
2514         ret = domain->str;
2515         g_string_free (domain, FALSE);
2516         return ret;
2517 }
2518
2519 static gchar *
2520 header_decode_addrspec (const gchar **in)
2521 {
2522         const gchar *inptr = *in;
2523         gchar *word;
2524         GString *addr = g_string_new ("");
2525
2526         header_decode_lwsp (&inptr);
2527
2528         /* addr-spec */
2529         word = header_decode_word (&inptr);
2530         if (word) {
2531                 addr = g_string_append (addr, word);
2532                 header_decode_lwsp (&inptr);
2533                 g_free (word);
2534                 while (*inptr == '.' && word) {
2535                         inptr++;
2536                         addr = g_string_append_c (addr, '.');
2537                         word = header_decode_word (&inptr);
2538                         if (word) {
2539                                 addr = g_string_append (addr, word);
2540                                 header_decode_lwsp (&inptr);
2541                                 g_free (word);
2542                         } else {
2543                                 w (g_warning ("Invalid address spec: %s", *in));
2544                         }
2545                 }
2546                 if (*inptr == '@') {
2547                         inptr++;
2548                         addr = g_string_append_c (addr, '@');
2549                         word = header_decode_domain (&inptr);
2550                         if (word) {
2551                                 addr = g_string_append (addr, word);
2552                                 g_free (word);
2553                         } else {
2554                                 w (g_warning ("Invalid address, missing domain: %s", *in));
2555                         }
2556                 } else {
2557                         w (g_warning ("Invalid addr-spec, missing @: %s", *in));
2558                 }
2559         } else {
2560                 w (g_warning ("invalid addr-spec, no local part"));
2561                 g_string_free (addr, TRUE);
2562
2563                 return NULL;
2564         }
2565
2566         /* FIXME: return null on error? */
2567
2568         *in = inptr;
2569         word = addr->str;
2570         g_string_free (addr, FALSE);
2571         return word;
2572 }
2573
2574 /*
2575  * address:
2576  * word *('.' word) @ domain |
2577  * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain |
2578  *
2579  * 1 * word ':'[ word ... etc (mailbox, as above) ] ';'
2580  */
2581
2582 /* mailbox:
2583  * word *( '.' word ) '@' domain
2584  * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain
2585  * */
2586
2587 static struct _camel_header_address *
2588 header_decode_mailbox (const gchar **in,
2589                        const gchar *charset)
2590 {
2591         const gchar *inptr = *in;
2592         gchar *pre;
2593         gint closeme = FALSE;
2594         GString *addr;
2595         GString *name = NULL;
2596         struct _camel_header_address *address = NULL;
2597         const gchar *comment = NULL;
2598
2599         addr = g_string_new ("");
2600
2601         /* for each address */
2602         pre = header_decode_word (&inptr);
2603         header_decode_lwsp (&inptr);
2604         if (!(*inptr == '.' || *inptr == '@' || *inptr == ',' || *inptr == '\0')) {
2605                 /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */
2606                 name = g_string_new ("");
2607                 while (pre) {
2608                         gchar *text, *last;
2609
2610                         /* perform internationalised decoding, and append */
2611                         text = camel_header_decode_string (pre, charset);
2612                         g_string_append (name, text);
2613                         last = pre;
2614                         g_free (text);
2615
2616                         pre = header_decode_word (&inptr);
2617                         if (pre) {
2618                                 gsize l = strlen (last);
2619                                 gsize p = strlen (pre);
2620
2621                                 /* dont append ' ' between sucsessive encoded words */
2622                                 if ((l > 6 && last[l - 2] == '?' && last[l - 1] == '=')
2623                                     && (p > 6 && pre[0] == '=' && pre[1] == '?')) {
2624                                         /* dont append ' ' */
2625                                 } else {
2626                                         name = g_string_append_c (name, ' ');
2627                                 }
2628                         } else {
2629                                 /* Fix for stupidly-broken-mailers that like to put '.''s in names unquoted */
2630                                 /* see bug #8147 */
2631                                 while (!pre && *inptr && *inptr != '<') {
2632                                         w (g_warning ("Working around stupid mailer bug #5: unescaped characters in names"));
2633                                         name = g_string_append_c (name, *inptr++);
2634                                         pre = header_decode_word (&inptr);
2635                                 }
2636                         }
2637                         g_free (last);
2638                 }
2639                 header_decode_lwsp (&inptr);
2640                 if (*inptr == '<') {
2641                         closeme = TRUE;
2642                 try_address_again:
2643                         inptr++;
2644                         header_decode_lwsp (&inptr);
2645                         if (*inptr == '@') {
2646                                 while (*inptr == '@') {
2647                                         inptr++;
2648                                         header_decode_domain (&inptr);
2649                                         header_decode_lwsp (&inptr);
2650                                         if (*inptr == ',') {
2651                                                 inptr++;
2652                                                 header_decode_lwsp (&inptr);
2653                                         }
2654                                 }
2655                                 if (*inptr == ':') {
2656                                         inptr++;
2657                                 } else {
2658                                         w (g_warning ("broken route-address, missing ':': %s", *in));
2659                                 }
2660                         }
2661                         pre = header_decode_word (&inptr);
2662                         /*header_decode_lwsp(&inptr);*/
2663                 } else {
2664                         w (g_warning ("broken address? %s", *in));
2665                 }
2666         }
2667
2668         if (pre) {
2669                 addr = g_string_append (addr, pre);
2670         } else {
2671                 w (g_warning ("No local-part for email address: %s", *in));
2672         }
2673
2674         /* should be at word '.' localpart */
2675         while (*inptr == '.' && pre) {
2676                 inptr++;
2677                 g_free (pre);
2678                 pre = header_decode_word (&inptr);
2679                 addr = g_string_append_c (addr, '.');
2680                 if (pre)
2681                         addr = g_string_append (addr, pre);
2682                 comment = inptr;
2683                 header_decode_lwsp (&inptr);
2684         }
2685         g_free (pre);
2686
2687         /* now at '@' domain part */
2688         if (*inptr == '@') {
2689                 gchar *dom;
2690
2691                 inptr++;
2692                 addr = g_string_append_c (addr, '@');
2693                 comment = inptr;
2694                 dom = header_decode_domain (&inptr);
2695                 addr = g_string_append (addr, dom);
2696                 g_free (dom);
2697         } else if (*inptr != '>' || !closeme) {
2698                 /* If we get a <, the address was probably a name part, lets try again shall we? */
2699                 /* Another fix for seriously-broken-mailers */
2700                 if (*inptr && *inptr != ',') {
2701                         gchar *text;
2702                         const gchar *name_part;
2703                         gboolean in_quote;
2704
2705                         w (g_warning ("We didn't get an '@' where we expected in '%s', trying again", *in));
2706                         w (g_warning ("Name is '%s', Addr is '%s' we're at '%s'\n", name ? name->str:"<UNSET>", addr->str, inptr));
2707
2708                         /* need to keep *inptr, as try_address_again will drop the current character */
2709                         if (*inptr == '<')
2710                                 closeme = TRUE;
2711                         else
2712                                 g_string_append_c (addr, *inptr);
2713
2714                         name_part = *in;
2715                         in_quote = FALSE;
2716                         while (*name_part && *name_part != ',') {
2717                                 if (*name_part == '\"')
2718                                         in_quote = !in_quote;
2719                                 else if (!in_quote && *name_part == '<')
2720                                         break;
2721                                 name_part++;
2722                         }
2723
2724                         if (*name_part == '<' && ((!strchr (name_part, ',') && strchr (name_part, '>')) || (strchr (name_part, ',') > strchr (name_part, '>')))) {
2725                                 /* it's of a form "display-name <addr-spec>" */
2726                                 if (name)
2727                                         g_string_free (name, TRUE);
2728                                 name = NULL;
2729                                 g_string_free (addr, TRUE);
2730
2731                                 if (name_part == *in)
2732                                         addr = g_string_new ("");
2733                                 else
2734                                         addr = g_string_new_len (*in, name_part - *in - (camel_mime_is_lwsp (name_part[-1]) ? 1 : 0));
2735                         }
2736
2737                         /* check for address is encoded word ... */
2738                         text = camel_header_decode_string (addr->str, charset);
2739                         if (name == NULL) {
2740                                 name = addr;
2741                                 addr = g_string_new ("");
2742                                 if (text) {
2743                                         g_string_truncate (name, 0);
2744                                         g_string_append (name, text);
2745                                 }
2746                         }/* else {
2747                                 g_string_append (name, text ? text : addr->str);
2748                                 g_string_truncate (addr, 0);
2749                         }*/
2750                         g_free (text);
2751
2752                         /* or maybe that we've added up a bunch of broken bits to make an encoded word */
2753                         if ((text = rfc2047_decode_word (name->str, name->len, charset))) {
2754                                 g_string_truncate (name, 0);
2755                                 g_string_append (name, text);
2756                                 g_free (text);
2757                         }
2758
2759                         goto try_address_again;
2760                 }
2761                 w (g_warning ("invalid address, no '@' domain part at %c: %s", *inptr, *in));
2762         }
2763
2764         if (closeme) {
2765                 header_decode_lwsp (&inptr);
2766                 if (*inptr == '>') {
2767                         inptr++;
2768                 } else {
2769                         w (g_warning ("invalid route address, no closing '>': %s", *in));
2770                 }
2771         } else if (name == NULL && comment != NULL && inptr>comment) { /* check for comment after address */
2772                 gchar *text, *tmp;
2773                 const gchar *comstart, *comend;
2774
2775                 /* this is a bit messy, we go from the last known position, because
2776                  * decode_domain/etc skip over any comments on the way */
2777                 /* FIXME: This wont detect comments inside the domain itself,
2778                  * but nobody seems to use that feature anyway ... */
2779
2780                 d (printf ("checking for comment from '%s'\n", comment));
2781
2782                 comstart = strchr (comment, '(');
2783                 if (comstart) {
2784                         comstart++;
2785                         header_decode_lwsp (&inptr);
2786                         comend = inptr - 1;
2787                         while (comend > comstart && comend[0] != ')')
2788                                 comend--;
2789
2790                         if (comend > comstart) {
2791                                 d (printf ("  looking at subset '%.*s'\n", comend - comstart, comstart));
2792                                 tmp = g_strndup (comstart, comend - comstart);
2793                                 text = camel_header_decode_string (tmp, charset);
2794                                 name = g_string_new (text);
2795                                 g_free (tmp);
2796                                 g_free (text);
2797                         }
2798                 }
2799         }
2800
2801         *in = inptr;
2802
2803         if (addr->len > 0) {
2804                 if (!g_utf8_validate (addr->str, addr->len, NULL)) {
2805                         /* workaround for invalid addr-specs containing 8bit chars (see bug #42170 for details) */
2806                         const gchar *locale_charset;
2807                         GString *out;
2808
2809                         locale_charset = camel_iconv_locale_charset ();
2810
2811                         out = g_string_new ("");
2812
2813                         if ((charset == NULL || !append_8bit (out, addr->str, addr->len, charset))
2814                             && (locale_charset == NULL || !append_8bit (out, addr->str, addr->len, locale_charset)))
2815                                 append_latin1 (out, addr->str, addr->len);
2816
2817                         g_string_free (addr, TRUE);
2818                         addr = out;
2819                 }
2820
2821                 if (!name) {
2822                         gchar *text;
2823
2824                         text = rfc2047_decode_word (addr->str, addr->len, charset);
2825                         if (text) {
2826                                 g_string_truncate (addr, 0);
2827                                 g_string_append (addr, text);
2828                                 g_free (text);
2829                         }
2830
2831                 }
2832
2833                 address = camel_header_address_new_name (name ? name->str : "", addr->str);
2834         }
2835
2836         d (printf ("got mailbox: %s\n", addr->str));
2837
2838         g_string_free (addr, TRUE);
2839         if (name)
2840                 g_string_free (name, TRUE);
2841
2842         return address;
2843 }
2844
2845 static struct _camel_header_address *
2846 header_decode_address (const gchar **in,
2847                        const gchar *charset)
2848 {
2849         const gchar *inptr = *in;
2850         gchar *pre;
2851         GString *group = g_string_new ("");
2852         struct _camel_header_address *addr = NULL, *member;
2853
2854         /* pre-scan, trying to work out format, discard results */
2855         header_decode_lwsp (&inptr);
2856         while ((pre = header_decode_word (&inptr))) {
2857                 group = g_string_append (group, pre);
2858                 group = g_string_append (group, " ");
2859                 g_free (pre);
2860         }
2861         header_decode_lwsp (&inptr);
2862         if (*inptr == ':') {
2863                 d (printf ("group detected: %s\n", group->str));
2864                 addr = camel_header_address_new_group (group->str);
2865                 /* that was a group spec, scan mailbox's */
2866                 inptr++;
2867                 /* FIXME: check rfc 2047 encodings of words, here or above in the loop */
2868                 header_decode_lwsp (&inptr);
2869                 if (*inptr != ';') {
2870                         gint go = TRUE;
2871                         do {
2872                                 member = header_decode_mailbox (&inptr, charset);
2873                                 if (member)
2874                                         camel_header_address_add_member (addr, member);
2875                                 header_decode_lwsp (&inptr);
2876                                 if (*inptr == ',')
2877                                         inptr++;
2878                                 else
2879                                         go = FALSE;
2880                         } while (go);
2881                         if (*inptr == ';') {
2882                                 inptr++;
2883                         } else {
2884                                 w (g_warning ("Invalid group spec, missing closing ';': %s", *in));
2885                         }
2886                 } else {
2887                         inptr++;
2888                 }
2889                 *in = inptr;
2890         } else {
2891                 addr = header_decode_mailbox (in, charset);
2892         }
2893
2894         g_string_free (group, TRUE);
2895
2896         return addr;
2897 }
2898
2899 static gchar *
2900 header_msgid_decode_internal (const gchar **in)
2901 {
2902         const gchar *inptr = *in;
2903         gchar *msgid = NULL;
2904
2905         d (printf ("decoding Message-ID: '%s'\n", *in));
2906
2907         header_decode_lwsp (&inptr);
2908         if (*inptr == '<') {
2909                 inptr++;
2910                 header_decode_lwsp (&inptr);
2911                 msgid = header_decode_addrspec (&inptr);
2912                 if (msgid) {
2913                         header_decode_lwsp (&inptr);
2914                         if (*inptr == '>') {
2915                                 inptr++;
2916                         } else {
2917                                 w (g_warning ("Missing closing '>' on message id: %s", *in));
2918                         }
2919                 } else {
2920                         w (g_warning ("Cannot find message id in: %s", *in));
2921                 }
2922         } else {
2923                 w (g_warning ("missing opening '<' on message id: %s", *in));
2924         }
2925         *in = inptr;
2926
2927         return msgid;
2928 }
2929
2930 /**
2931  * camel_header_msgid_decode:
2932  * @in: input string
2933  *
2934  * Extract a message-id token from @in.
2935  *
2936  * Returns: the msg-id
2937  **/
2938 gchar *
2939 camel_header_msgid_decode (const gchar *in)
2940 {
2941         if (in == NULL)
2942                 return NULL;
2943
2944         return header_msgid_decode_internal (&in);
2945 }
2946
2947 /**
2948  * camel_header_contentid_decode:
2949  * @in: input string
2950  *
2951  * Extract a content-id from @in.
2952  *
2953  * Returns: the extracted content-id
2954  **/
2955 gchar *
2956 camel_header_contentid_decode (const gchar *in)
2957 {
2958         const gchar *inptr = in;
2959         gboolean at = FALSE;
2960         GString *addr;
2961         gchar *buf;
2962
2963         d (printf ("decoding Content-ID: '%s'\n", in));
2964
2965         header_decode_lwsp (&inptr);
2966
2967         /* some lame mailers quote the Content-Id */
2968         if (*inptr == '"')
2969                 inptr++;
2970
2971         /* make sure the content-id is not "" which can happen if we get a
2972          * content-id such as <.@> (which Eudora likes to use...) */
2973         if ((buf = camel_header_msgid_decode (inptr)) != NULL && *buf)
2974                 return buf;
2975
2976         g_free (buf);
2977
2978         /* ugh, not a valid msg-id - try to get something useful out of it then? */
2979         inptr = in;
2980         header_decode_lwsp (&inptr);
2981         if (*inptr == '<') {
2982                 inptr++;
2983                 header_decode_lwsp (&inptr);
2984         }
2985
2986         /* Eudora has been known to use <.@> as a content-id */
2987         if (!(buf = header_decode_word (&inptr)) && !strchr (".@", *inptr))
2988                 return NULL;
2989
2990         addr = g_string_new ("");
2991         header_decode_lwsp (&inptr);
2992         while (buf != NULL || *inptr == '.' || (*inptr == '@' && !at)) {
2993                 if (buf != NULL) {
2994                         g_string_append (addr, buf);
2995                         g_free (buf);
2996                         buf = NULL;
2997                 }
2998
2999                 if (!at) {
3000                         if (*inptr == '.') {
3001                                 g_string_append_c (addr, *inptr++);
3002                                 buf = header_decode_word (&inptr);
3003                         } else if (*inptr == '@') {
3004                                 g_string_append_c (addr, *inptr++);
3005                                 buf = header_decode_word (&inptr);
3006                                 at = TRUE;
3007                         }
3008                 } else if (strchr (".[]", *inptr)) {
3009                         g_string_append_c (addr, *inptr++);
3010                         buf = header_decode_atom (&inptr);
3011                 }
3012
3013                 header_decode_lwsp (&inptr);
3014         }
3015
3016         buf = addr->str;
3017         g_string_free (addr, FALSE);
3018
3019         return buf;
3020 }
3021
3022 void
3023 camel_header_references_list_append_asis (struct _camel_header_references **list,
3024                                           gchar *ref)
3025 {
3026         struct _camel_header_references *w = (struct _camel_header_references *) list, *n;
3027         while (w->next)
3028                 w = w->next;
3029         n = g_malloc (sizeof (*n));
3030         n->id = ref;
3031         n->next = NULL;
3032         w->next = n;
3033 }
3034
3035 gint
3036 camel_header_references_list_size (struct _camel_header_references **list)
3037 {
3038         gint count = 0;
3039         struct _camel_header_references *w = *list;
3040         while (w) {
3041                 count++;
3042                 w = w->next;
3043         }
3044         return count;
3045 }
3046
3047 void
3048 camel_header_references_list_clear (struct _camel_header_references **list)
3049 {
3050         struct _camel_header_references *w = *list, *n;
3051         while (w) {
3052                 n = w->next;
3053                 g_free (w->id);
3054                 g_free (w);
3055                 w = n;
3056         }
3057         *list = NULL;
3058 }
3059
3060 static void
3061 header_references_decode_single (const gchar **in,
3062                                  struct _camel_header_references **head)
3063 {
3064         struct _camel_header_references *ref;
3065         const gchar *inptr = *in;
3066         gchar *id, *word;
3067
3068         while (*inptr) {
3069                 header_decode_lwsp (&inptr);
3070                 if (*inptr == '<') {
3071                         id = header_msgid_decode_internal (&inptr);
3072                         if (id) {
3073                                 ref = g_malloc (sizeof (struct _camel_header_references));
3074                                 ref->next = *head;
3075                                 ref->id = id;
3076                                 *head = ref;
3077                                 break;
3078                         }
3079                 } else {
3080                         word = header_decode_word (&inptr);
3081                         if (word)
3082                                 g_free (word);
3083                         else if (*inptr != '\0')
3084                                 inptr++; /* Stupid mailer tricks */
3085                 }
3086         }
3087
3088         *in = inptr;
3089 }
3090
3091 /* TODO: why is this needed?  Can't the other interface also work? */
3092 struct _camel_header_references *
3093 camel_header_references_inreplyto_decode (const gchar *in)
3094 {
3095         struct _camel_header_references *ref = NULL;
3096
3097         if (in == NULL || in[0] == '\0')
3098                 return NULL;
3099
3100         header_references_decode_single (&in, &ref);
3101
3102         return ref;
3103 }
3104
3105 /* generate a list of references, from most recent up */
3106 struct _camel_header_references *
3107 camel_header_references_decode (const gchar *in)
3108 {
3109         struct _camel_header_references *refs = NULL;
3110
3111         if (in == NULL || in[0] == '\0')
3112                 return NULL;
3113
3114         while (*in)
3115                 header_references_decode_single (&in, &refs);
3116
3117         return refs;
3118 }
3119
3120 struct _camel_header_references *
3121 camel_header_references_dup (const struct _camel_header_references *list)
3122 {
3123         struct _camel_header_references *new = NULL, *tmp;
3124
3125         while (list) {
3126                 tmp = g_new (struct _camel_header_references, 1);
3127                 tmp->next = new;
3128                 tmp->id = g_strdup (list->id);
3129                 new = tmp;
3130                 list = list->next;
3131         }
3132         return new;
3133 }
3134
3135 struct _camel_header_address *
3136 camel_header_mailbox_decode (const gchar *in,
3137                              const gchar *charset)
3138 {
3139         if (in == NULL)
3140                 return NULL;
3141
3142         return header_decode_mailbox (&in, charset);
3143 }
3144
3145 struct _camel_header_address *
3146 camel_header_address_decode (const gchar *in,
3147                              const gchar *charset)
3148 {
3149         const gchar *inptr = in, *last;
3150         struct _camel_header_address *list = NULL, *addr;
3151
3152         d (printf ("decoding To: '%s'\n", in));
3153
3154         if (in == NULL)
3155                 return NULL;
3156
3157         header_decode_lwsp (&inptr);
3158         if (*inptr == 0)
3159                 return NULL;
3160
3161         do {
3162                 last = inptr;
3163                 addr = header_decode_address (&inptr, charset);
3164                 if (addr)
3165                         camel_header_address_list_append (&list, addr);
3166                 header_decode_lwsp (&inptr);
3167                 if (*inptr == ',')
3168                         inptr++;
3169                 else
3170                         break;
3171         } while (inptr != last);
3172
3173         if (*inptr) {
3174                 w (g_warning ("Invalid input detected at %c (%d): %s\n or at: %s", *inptr, inptr - in, in, inptr));
3175         }
3176
3177         if (inptr == last) {
3178                 w (g_warning ("detected invalid input loop at : %s", last));
3179         }
3180
3181         return list;
3182 }
3183
3184 struct _camel_header_newsgroup *
3185 camel_header_newsgroups_decode (const gchar *in)
3186 {
3187         const gchar *inptr = in;
3188         register gchar c;
3189         struct _camel_header_newsgroup *head, *last, *ng;
3190         const gchar *start;
3191
3192         head = NULL;
3193         last = (struct _camel_header_newsgroup *) &head;
3194
3195         do {
3196                 header_decode_lwsp (&inptr);
3197                 start = inptr;
3198                 while ((c = *inptr++) && !camel_mime_is_lwsp (c) && c != ',')
3199                         ;
3200                 if (start != inptr - 1) {
3201                         ng = g_malloc (sizeof (*ng));
3202                         ng->newsgroup = g_strndup (start, inptr - start - 1);
3203                         ng->next = NULL;
3204                         last->next = ng;
3205                         last = ng;
3206                 }
3207         } while (c);
3208
3209         return head;
3210 }
3211
3212 void
3213 camel_header_newsgroups_free (struct _camel_header_newsgroup *ng)
3214 {
3215         while (ng) {
3216                 struct _camel_header_newsgroup *nng = ng->next;
3217
3218                 g_free (ng->newsgroup);
3219                 g_free (ng);
3220                 ng = nng;
3221         }
3222 }
3223
3224 /* this must be kept in sync with the header */
3225 static const gchar *encodings[] = {
3226         "",
3227         "7bit",
3228         "8bit",
3229         "base64",
3230         "quoted-printable",
3231         "binary",
3232         "x-uuencode",
3233 };
3234
3235 const gchar *
3236 camel_transfer_encoding_to_string (CamelTransferEncoding encoding)
3237 {
3238         if (encoding >= G_N_ELEMENTS (encodings))
3239                 encoding = 0;
3240
3241         return encodings[encoding];
3242 }
3243
3244 CamelTransferEncoding
3245 camel_transfer_encoding_from_string (const gchar *string)
3246 {
3247         gint i;
3248
3249         if (string != NULL) {
3250                 for (i = 0; i < G_N_ELEMENTS (encodings); i++)
3251                         if (!g_ascii_strcasecmp (string, encodings[i]))
3252                                 return i;
3253         }
3254
3255         return CAMEL_TRANSFER_ENCODING_DEFAULT;
3256 }
3257
3258 void
3259 camel_header_mime_decode (const gchar *in,
3260                           gint *maj,
3261                           gint *min)
3262 {
3263         const gchar *inptr = in;
3264         gint major=-1, minor=-1;
3265
3266         d (printf ("decoding MIME-Version: '%s'\n", in));
3267
3268         if (in != NULL) {
3269                 header_decode_lwsp (&inptr);
3270                 if (isdigit (*inptr)) {
3271                         major = camel_header_decode_int (&inptr);
3272                         header_decode_lwsp (&inptr);
3273                         if (*inptr == '.') {
3274                                 inptr++;
3275                                 header_decode_lwsp (&inptr);
3276                                 if (isdigit (*inptr))
3277                                         minor = camel_header_decode_int (&inptr);
3278                         }
3279                 }
3280         }
3281
3282         if (maj)
3283                 *maj = major;
3284         if (min)
3285                 *min = minor;
3286
3287         d (printf ("major = %d, minor = %d\n", major, minor));
3288 }
3289
3290 struct _rfc2184_param {
3291         struct _camel_header_param param;
3292         gint index;
3293 };
3294
3295 static gint
3296 rfc2184_param_cmp (gconstpointer ap,
3297                    gconstpointer bp)
3298 {
3299         const struct _rfc2184_param *a = *(gpointer *) ap;
3300         const struct _rfc2184_param *b = *(gpointer *) bp;
3301         gint res;
3302
3303         res = strcmp (a->param.name, b->param.name);
3304         if (res == 0) {
3305                 if (a->index > b->index)
3306                         res = 1;
3307                 else if (a->index < b->index)
3308                         res = -1;
3309         }
3310
3311         return res;
3312 }
3313
3314 /* NB: Steals name and value */
3315 static struct _camel_header_param *
3316 header_append_param (struct _camel_header_param *last,
3317                      gchar *name,
3318                      gchar *value)
3319 {
3320         struct _camel_header_param *node;
3321
3322         /* This handles -
3323          *  8 bit data in parameters, illegal, tries to convert using locale, or just safens it up.
3324          *  rfc2047 ecoded parameters, illegal, decodes them anyway.  Some Outlook & Mozilla do this?
3325         */
3326         node = g_malloc (sizeof (*node));
3327         last->next = node;
3328         node->next = NULL;
3329         node->name = name;
3330         if (strncmp (value, "=?", 2) == 0
3331             && (node->value = header_decode_text (value, FALSE, NULL))) {
3332                 g_free (value);
3333         } else if (g_ascii_strcasecmp (name, "boundary") != 0 && !g_utf8_validate (value, -1, NULL)) {
3334                 const gchar *charset = camel_iconv_locale_charset ();
3335
3336                 if ((node->value = header_convert ("UTF-8", charset ? charset:"ISO-8859-1", value, strlen (value)))) {
3337                         g_free (value);
3338                 } else {
3339                         node->value = value;
3340                         for (;*value; value++)
3341                                 if (!isascii ((guchar) * value))
3342                                         *value = '_';
3343                 }
3344         } else
3345                 node->value = value;
3346
3347         return node;
3348 }
3349
3350 static struct _camel_header_param *
3351 header_decode_param_list (const gchar **in)
3352 {
3353         struct _camel_header_param *head = NULL, *last = (struct _camel_header_param *) &head;
3354         GPtrArray *split = NULL;
3355         const gchar *inptr = *in;
3356         struct _rfc2184_param *work;
3357         gchar *tmp;
3358
3359         /* Dump parameters into the output list, in the order found.  RFC 2184 split parameters are kept in an array */
3360         header_decode_lwsp (&inptr);
3361         while (*inptr == ';') {
3362                 gchar *name;
3363                 gchar *value = NULL;
3364
3365                 inptr++;
3366                 name = decode_token (&inptr);
3367                 header_decode_lwsp (&inptr);
3368                 if (*inptr == '=') {
3369                         inptr++;
3370                         value = header_decode_value (&inptr);
3371                 }
3372
3373                 if (name && value) {
3374                         gchar *index = strchr (name, '*');
3375
3376                         if (index) {
3377                                 if (index[1] == 0) {
3378                                         /* VAL*="foo", decode immediately and append */
3379                                         *index = 0;
3380                                         tmp = rfc2184_decode (value, strlen (value));
3381                                         if (tmp) {
3382                                                 g_free (value);
3383                                                 value = tmp;
3384                                         }
3385                                         last = header_append_param (last, name, value);
3386                                 } else {
3387                                         /* VAL*1="foo", save for later */
3388                                         *index++ = 0;
3389                                         work = g_malloc (sizeof (*work));
3390                                         work->param.name = name;
3391                                         work->param.value = value;
3392                                         work->index = atoi (index);
3393                                         if (split == NULL)
3394                                                 split = g_ptr_array_new ();
3395                                         g_ptr_array_add (split, work);
3396                                 }
3397                         } else {
3398                                 last = header_append_param (last, name, value);
3399                         }
3400                 } else {
3401                         g_free (name);
3402                         g_free (value);
3403                 }
3404
3405                 header_decode_lwsp (&inptr);
3406         }
3407
3408         /* Rejoin any RFC 2184 split parameters in the proper order */
3409         /* Parameters with the same index will be concatenated in undefined order */
3410         if (split) {
3411                 GString *value = g_string_new ("");
3412                 struct _rfc2184_param *first;
3413                 gint i;
3414
3415                 qsort (split->pdata, split->len, sizeof (split->pdata[0]), rfc2184_param_cmp);
3416                 first = split->pdata[0];
3417                 for (i = 0; i < split->len; i++) {
3418                         work = split->pdata[i];
3419                         if (split->len - 1 == i)
3420                                 g_string_append (value, work->param.value);
3421                         if (split->len - 1 == i || strcmp (work->param.name, first->param.name) != 0) {
3422                                 tmp = rfc2184_decode (value->str, value->len);
3423                                 if (tmp == NULL)
3424                                         tmp = g_strdup (value->str);
3425
3426                                 last = header_append_param (last, g_strdup (first->param.name), tmp);
3427                                 g_string_truncate (value, 0);
3428                                 first = work;
3429                         }
3430                         if (split->len - 1 != i)
3431                                 g_string_append (value, work->param.value);
3432                 }
3433                 g_string_free (value, TRUE);
3434                 for (i = 0; i < split->len; i++) {
3435                         work = split->pdata[i];
3436                         g_free (work->param.name);
3437                         g_free (work->param.value);
3438                         g_free (work);
3439                 }
3440                 g_ptr_array_free (split, TRUE);
3441         }
3442
3443         *in = inptr;
3444
3445         return head;
3446 }
3447
3448 struct _camel_header_param *
3449 camel_header_param_list_decode (const gchar *in)
3450 {
3451         if (in == NULL)
3452                 return NULL;
3453
3454         return header_decode_param_list (&in);
3455 }
3456
3457 static gchar *
3458 header_encode_param (const guchar *in,
3459                      gboolean *encoded,
3460                      gboolean is_filename)
3461 {
3462         const guchar *inptr = in;
3463         guchar *outbuf = NULL;
3464         const gchar *charset;
3465         GString *out;
3466         guint32 c;
3467         gchar *str;
3468
3469         *encoded = FALSE;
3470
3471         g_return_val_if_fail (in != NULL, NULL);
3472
3473         if (is_filename) {
3474                 if (!g_utf8_validate ((gchar *) inptr, -1, NULL)) {
3475                         GString *buff = g_string_new ("");
3476
3477                         for (; inptr && *inptr; inptr++) {
3478                                 if (*inptr < 32)
3479                                         g_string_append_printf (buff, "%%%02X", (*inptr) & 0xFF);
3480                                 else
3481                                         g_string_append_c (buff, *inptr);
3482                         }
3483
3484                         outbuf = (guchar *) g_string_free (buff, FALSE);
3485                         inptr = outbuf;
3486                 }
3487
3488                 /* do not set encoded flag for file names */
3489                 str = header_encode_string_rfc2047 (inptr, TRUE);
3490                 g_free (outbuf);
3491
3492                 return str;
3493         }
3494
3495         /* if we have really broken utf8 passed in, we just treat it as binary data */
3496
3497         charset = camel_charset_best ((gchar *) in, strlen ((gchar *) in));
3498         if (charset == NULL) {
3499                 return g_strdup ((gchar *) in);
3500         }
3501
3502         if (g_ascii_strcasecmp (charset, "UTF-8") != 0) {
3503                 if ((outbuf = (guchar *) header_convert (charset, "UTF-8", (const gchar *) in, strlen ((gchar *) in))))
3504                         inptr = outbuf;
3505                 else
3506                         return g_strdup ((gchar *) in);
3507         }
3508
3509         /* FIXME: set the 'language' as well, assuming we can get that info...? */
3510         out = g_string_new (charset);
3511         g_string_append (out, "''");
3512
3513         while ((c = *inptr++)) {
3514                 if (camel_mime_is_attrchar (c))
3515                         g_string_append_c (out, c);
3516                 else
3517                         g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
3518         }
3519         g_free (outbuf);
3520
3521         str = out->str;
3522         g_string_free (out, FALSE);
3523         *encoded = TRUE;
3524
3525         return str;
3526 }
3527
3528 /* HACK: Set to non-zero when you want the 'filename' and 'name' headers encoded in RFC 2047 way,
3529  * otherwise they will be encoded in the correct RFC 2231 way. It's because Outlook and GMail
3530  * do not understand the correct standard and refuse attachments with localized name sent
3531  * from evolution. This seems to have been fixed in Exchange 2007 at least - not sure about
3532  * standalone Outlook. */
3533 gint camel_header_param_encode_filenames_in_rfc_2047 = 0;
3534
3535 void
3536 camel_header_param_list_format_append (GString *out,
3537                                        struct _camel_header_param *p)
3538 {
3539         gint used = out->len;
3540
3541         while (p) {
3542                 gboolean is_filename = camel_header_param_encode_filenames_in_rfc_2047 && (g_ascii_strcasecmp (p->name, "filename") == 0 || g_ascii_strcasecmp (p->name, "name") == 0);
3543                 gboolean encoded = FALSE;
3544                 gboolean quote = FALSE;
3545                 gint here = out->len;
3546                 gsize nlen, vlen;
3547                 gchar *value;
3548
3549                 if (!p->value) {
3550                         p = p->next;
3551                         continue;
3552                 }
3553
3554                 value = header_encode_param ((guchar *) p->value, &encoded, is_filename);
3555                 if (!value) {
3556                         w (g_warning ("appending parameter %s=%s violates rfc2184", p->name, p->value));
3557                         value = g_strdup (p->value);
3558                 }
3559
3560                 if (!encoded) {
3561                         gchar *ch;
3562
3563                         for (ch = value; ch && *ch; ch++) {
3564                                 if (camel_mime_is_tspecial (*ch) || camel_mime_is_lwsp (*ch))
3565                                         break;
3566                         }
3567
3568                         quote = ch && *ch;
3569                 }
3570
3571                 quote = quote || is_filename;
3572                 nlen = strlen (p->name);
3573                 vlen = strlen (value);
3574
3575                 /* do not fold file names */
3576                 if (!is_filename && used + nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3577                         out = g_string_append (out, ";\n\t");
3578                         here = out->len;
3579                         used = 0;
3580                 } else
3581                         out = g_string_append (out, "; ");
3582
3583                 if (!is_filename && nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3584                         /* we need to do special rfc2184 parameter wrapping */
3585                         gint maxlen = CAMEL_FOLD_SIZE - (nlen + 8);
3586                         gchar *inptr, *inend;
3587                         gint i = 0;
3588
3589                         inptr = value;
3590                         inend = value + vlen;
3591
3592                         while (inptr < inend) {
3593                                 gchar *ptr = inptr + MIN (inend - inptr, maxlen);
3594
3595                                 if (encoded && ptr < inend) {
3596                                         /* be careful not to break an encoded gchar (ie %20) */
3597                                         gchar *q = ptr;
3598                                         gint j = 2;
3599
3600                                         for (; j > 0 && q > inptr && *q != '%'; j--, q--);
3601                                         if (*q == '%')
3602                                                 ptr = q;
3603                                 }
3604
3605                                 if (i != 0) {
3606                                         g_string_append (out, ";\n\t");
3607                                         here = out->len;
3608                                         used = 0;
3609                                 }
3610
3611                                 g_string_append_printf (out, "%s*%d%s=", p->name, i++, encoded ? "*" : "");
3612                                 if (encoded || !quote)
3613                                         g_string_append_len (out, inptr, ptr - inptr);
3614                                 else
3615                                         quote_word (out, TRUE, inptr, ptr - inptr);
3616
3617                                 d (printf ("wrote: %s\n", out->str + here));
3618
3619                                 used += (out->len - here);
3620
3621                                 inptr = ptr;
3622                         }
3623                 } else {
3624                         g_string_append_printf (out, "%s%s=", p->name, encoded ? "*" : "");
3625
3626                         /* Quote even if we don't need to in order to
3627                          * work around broken mail software like the
3628                          * Jive Forums' NNTP gateway */
3629                         if (encoded /*|| !quote */)
3630                                 g_string_append (out, value);
3631                         else
3632                                 quote_word (out, TRUE, value, vlen);
3633
3634                         used += (out->len - here);
3635                 }
3636
3637                 g_free (value);
3638
3639                 p = p->next;
3640         }
3641 }
3642
3643 gchar *
3644 camel_header_param_list_format (struct _camel_header_param *p)
3645 {
3646         GString *out = g_string_new ("");
3647         gchar *ret;
3648
3649         camel_header_param_list_format_append (out, p);
3650         ret = out->str;
3651         g_string_free (out, FALSE);
3652         return ret;
3653 }
3654
3655 CamelContentType *
3656 camel_content_type_decode (const gchar *in)
3657 {
3658         const gchar *inptr = in;
3659         gchar *type, *subtype = NULL;
3660         CamelContentType *t = NULL;
3661
3662         if (in == NULL)
3663                 return NULL;
3664
3665         type = decode_token (&inptr);
3666         header_decode_lwsp (&inptr);
3667         if (type) {
3668                 if  (*inptr == '/') {
3669                         inptr++;
3670                         subtype = decode_token (&inptr);
3671                 }
3672                 if (subtype == NULL && (!g_ascii_strcasecmp (type, "text"))) {
3673                         w (g_warning ("text type with no subtype, resorting to text/plain: %s", in));
3674                         subtype = g_strdup ("plain");
3675                 }
3676                 if (subtype == NULL) {
3677                         w (g_warning ("MIME type with no subtype: %s", in));
3678                 }
3679
3680                 t = camel_content_type_new (type, subtype);
3681                 t->params = header_decode_param_list (&inptr);
3682                 g_free (type);
3683                 g_free (subtype);
3684         } else {
3685                 g_free (type);
3686                 d (printf ("cannot find MIME type in header (2) '%s'", in));
3687         }
3688         return t;
3689 }
3690
3691 void
3692 camel_content_type_dump (CamelContentType *ct)
3693 {
3694         struct _camel_header_param *p;
3695
3696         printf ("Content-Type: ");
3697         if (ct == NULL) {
3698                 printf ("<NULL>\n");
3699                 return;
3700         }
3701         printf ("%s / %s", ct->type, ct->subtype);
3702         p = ct->params;
3703         if (p) {
3704                 while (p) {
3705                         printf (";\n\t%s=\"%s\"", p->name, p->value);
3706                         p = p->next;
3707                 }
3708         }
3709         printf ("\n");
3710 }
3711
3712 gchar *
3713 camel_content_type_format (CamelContentType *ct)
3714 {
3715         GString *out;
3716         gchar *ret;
3717
3718         if (ct == NULL)
3719                 return NULL;
3720
3721         out = g_string_new ("");
3722         if (ct->type == NULL) {
3723                 g_string_append_printf (out, "text/plain");
3724                 w (g_warning ("Content-Type with no main type"));
3725         } else if (ct->subtype == NULL) {
3726                 w (g_warning ("Content-Type with no sub type: %s", ct->type));
3727                 if (!g_ascii_strcasecmp (ct->type, "multipart"))
3728                         g_string_append_printf (out, "%s/mixed", ct->type);
3729                 else
3730                         g_string_append_printf (out, "%s", ct->type);
3731         } else {
3732                 g_string_append_printf (out, "%s/%s", ct->type, ct->subtype);
3733         }
3734         camel_header_param_list_format_append (out, ct->params);
3735
3736         ret = out->str;
3737         g_string_free (out, FALSE);
3738
3739         return ret;
3740 }
3741
3742 gchar *
3743 camel_content_type_simple (CamelContentType *ct)
3744 {
3745         if (ct->type == NULL) {
3746                 w (g_warning ("Content-Type with no main type"));
3747                 return g_strdup ("text/plain");
3748         } else if (ct->subtype == NULL) {
3749                 w (g_warning ("Content-Type with no sub type: %s", ct->type));
3750                 if (!g_ascii_strcasecmp (ct->type, "multipart"))
3751                         return g_strdup_printf ("%s/mixed", ct->type);
3752                 else
3753                         return g_strdup (ct->type);
3754         } else
3755                 return g_strdup_printf ("%s/%s", ct->type, ct->subtype);
3756 }
3757
3758 gchar *
3759 camel_content_transfer_encoding_decode (const gchar *in)
3760 {
3761         if (in)
3762                 return decode_token (&in);
3763
3764         return NULL;
3765 }
3766
3767 CamelContentDisposition *
3768 camel_content_disposition_decode (const gchar *in)
3769 {
3770         CamelContentDisposition *d = NULL;
3771         const gchar *inptr = in;
3772
3773         if (in == NULL)
3774                 return NULL;
3775
3776         d = g_malloc (sizeof (*d));
3777         d->refcount = 1;
3778         d->disposition = decode_token (&inptr);
3779         if (d->disposition == NULL) {
3780                 w (g_warning ("Empty disposition type"));
3781         }
3782         d->params = header_decode_param_list (&inptr);
3783         return d;
3784 }
3785
3786 void
3787 camel_content_disposition_ref (CamelContentDisposition *d)
3788 {
3789         if (d)
3790                 d->refcount++;
3791 }
3792
3793 void
3794 camel_content_disposition_unref (CamelContentDisposition *d)
3795 {
3796         if (d) {
3797                 if (d->refcount <= 1) {
3798                         camel_header_param_list_free (d->params);
3799                         g_free (d->disposition);
3800                         g_free (d);
3801                 } else {
3802                         d->refcount--;
3803                 }
3804         }
3805 }
3806
3807 gchar *
3808 camel_content_disposition_format (CamelContentDisposition *d)
3809 {
3810         GString *out;
3811         gchar *ret;
3812
3813         if (d == NULL)
3814                 return NULL;
3815
3816         out = g_string_new ("");
3817         if (d->disposition)
3818                 out = g_string_append (out, d->disposition);
3819         else
3820                 out = g_string_append (out, "attachment");
3821         camel_header_param_list_format_append (out, d->params);
3822
3823         ret = out->str;
3824         g_string_free (out, FALSE);
3825         return ret;
3826 }
3827
3828 /* date parser macros */
3829 #define NUMERIC_CHARS          "1234567890"
3830 #define WEEKDAY_CHARS          "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
3831 #define MONTH_CHARS            "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
3832 #define TIMEZONE_ALPHA_CHARS   "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
3833 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
3834 #define TIME_CHARS             "1234567890:"
3835
3836 #define DATE_TOKEN_NON_NUMERIC          (1 << 0)
3837 #define DATE_TOKEN_NON_WEEKDAY          (1 << 1)
3838 #define DATE_TOKEN_NON_MONTH            (1 << 2)
3839 #define DATE_TOKEN_NON_TIME             (1 << 3)
3840 #define DATE_TOKEN_HAS_COLON            (1 << 4)
3841 #define DATE_TOKEN_NON_TIMEZONE_ALPHA   (1 << 5)
3842 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
3843 #define DATE_TOKEN_HAS_SIGN             (1 << 7)
3844
3845 static guchar camel_datetok_table[256] = {
3846         128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3847         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3848         111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
3849          38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
3850         111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
3851          79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
3852         111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
3853         107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
3854         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3855         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3856         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3857         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3858         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3859         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3860         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3861         111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3862 };
3863
3864 static struct {
3865         const gchar *name;
3866         gint offset;
3867 } tz_offsets[] = {
3868         { "UT", 0 },
3869         { "GMT", 0 },
3870         { "EST", -500 },        /* these are all US timezones.  bloody yanks */
3871         { "EDT", -400 },
3872         { "CST", -600 },
3873         { "CDT", -500 },
3874         { "MST", -700 },
3875         { "MDT", -600 },
3876         { "PST", -800 },
3877         { "PDT", -700 },
3878         { "Z", 0 },
3879         { "A", -100 },
3880         { "M", -1200 },
3881         { "N", 100 },
3882         { "Y", 1200 },
3883 };
3884
3885 static const gchar tm_months[][4] = {
3886         "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3887         "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
3888 };
3889
3890 static const gchar tm_days[][4] = {
3891         "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
3892 };
3893
3894 /**
3895  * camel_header_format_date:
3896  * @date: time_t date representation
3897  * @tz_offset: Timezone offset
3898  *
3899  * Allocates a string buffer containing the rfc822 formatted date
3900  * string represented by @time and @tz_offset.
3901  *
3902  * Returns: a valid string representation of the date.
3903  **/
3904 gchar *
3905 camel_header_format_date (time_t date,
3906                           gint tz_offset)
3907 {
3908         struct tm tm;
3909
3910         d (printf ("offset = %d\n", tz_offset));
3911
3912         d (printf ("converting date %s", ctime (&date)));
3913
3914         date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
3915
3916         d (printf ("converting date %s", ctime (&date)));
3917
3918         gmtime_r (&date, &tm);
3919
3920         return g_strdup_printf (
3921                 "%s, %02d %s %04d %02d:%02d:%02d %+05d",
3922                 tm_days[tm.tm_wday],
3923                 tm.tm_mday,
3924                 tm_months[tm.tm_mon],
3925                 tm.tm_year + 1900,
3926                 tm.tm_hour,
3927                 tm.tm_min,
3928                 tm.tm_sec,
3929                 tz_offset);
3930 }
3931
3932 /* This is where it gets ugly... */
3933
3934 struct _date_token {
3935         struct _date_token *next;
3936         guchar mask;
3937         const gchar *start;
3938         gsize len;
3939 };
3940
3941 static struct _date_token *
3942 datetok (const gchar *date)
3943 {
3944         struct _date_token *tokens = NULL, *token, *tail = (struct _date_token *) &tokens;
3945         const gchar *start, *end;
3946         guchar mask;
3947
3948         start = date;
3949         while (*start) {
3950                 /* kill leading whitespace */
3951                 while (*start == ' ' || *start == '\t')
3952                         start++;
3953
3954                 if (*start == '\0')
3955                         break;
3956
3957                 mask = camel_datetok_table[(guchar) *start];
3958
3959                 /* find the end of this token */
3960                 end = start + 1;
3961                 while (*end && !strchr ("-/,\t\r\n ", *end))
3962                         mask |= camel_datetok_table[(guchar) *end++];
3963
3964                 if (end != start) {
3965                         token = g_malloc (sizeof (struct _date_token));
3966                         token->next = NULL;
3967                         token->start = start;
3968                         token->len = end - start;
3969                         token->mask = mask;
3970
3971                         tail->next = token;
3972                         tail = token;
3973                 }
3974
3975                 if (*end)
3976                         start = end + 1;
3977                 else
3978                         break;
3979         }
3980
3981         return tokens;
3982 }
3983
3984 static gint
3985 decode_int (const gchar *in,
3986             gsize inlen)
3987 {
3988         register const gchar *inptr;
3989         gint sign = 1, val = 0;
3990         const gchar *inend;
3991
3992         inptr = in;
3993         inend = in + inlen;
3994
3995         if (*inptr == '-') {
3996                 sign = -1;
3997                 inptr++;
3998         } else if (*inptr == '+')
3999                 inptr++;
4000
4001         for (; inptr < inend; inptr++) {
4002                 if (!(*inptr >= '0' && *inptr <= '9'))
4003                         return -1;
4004                 else
4005                         val = (val * 10) + (*inptr - '0');
4006         }
4007
4008         val *= sign;
4009
4010         return val;
4011 }
4012
4013 #if 0
4014 static gint
4015 get_days_in_month (gint month,
4016                    gint year)
4017 {
4018         switch (month) {
4019         case 1:
4020         case 3:
4021         case 5:
4022         case 7:
4023         case 8:
4024         case 10:
4025         case 12:
4026                 return 31;
4027         case 4:
4028         case 6:
4029         case 9:
4030         case 11:
4031                 return 30;
4032         case 2:
4033                 if (g_date_is_leap_year (year))
4034                         return 29;
4035                 else
4036                         return 28;
4037         default:
4038                 return 0;
4039         }
4040 }
4041 #endif
4042
4043 static gint
4044 get_wday (const gchar *in,
4045           gsize inlen)
4046 {
4047         gint wday;
4048
4049         g_return_val_if_fail (in != NULL, -1);
4050
4051         if (inlen < 3)
4052                 return -1;
4053
4054         for (wday = 0; wday < 7; wday++) {
4055                 if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
4056                         return wday;
4057         }
4058
4059         return -1;  /* unknown week day */
4060 }
4061
4062 static gint
4063 get_mday (const gchar *in,
4064           gsize inlen)
4065 {
4066         gint mday;
4067
4068         g_return_val_if_fail (in != NULL, -1);
4069
4070         mday = decode_int (in, inlen);
4071
4072         if (mday < 0 || mday > 31)
4073                 mday = -1;
4074
4075         return mday;
4076 }
4077
4078 static gint
4079 get_month (const gchar *in,
4080            gsize inlen)
4081 {
4082         gint i;
4083
4084         g_return_val_if_fail (in != NULL, -1);
4085
4086         if (inlen < 3)
4087                 return -1;
4088
4089         for (i = 0; i < 12; i++) {
4090                 if (!g_ascii_strncasecmp (in, tm_months[i], 3))
4091                         return i;
4092         }
4093
4094         return -1;  /* unknown month */
4095 }
4096
4097 static gint
4098 get_year (const gchar *in,
4099           gsize inlen)
4100 {
4101         gint year;
4102
4103         g_return_val_if_fail (in != NULL, -1);
4104
4105         if ((year = decode_int (in, inlen)) == -1)
4106                 return -1;
4107
4108         if (year < 100)
4109                 year += (year < 70) ? 2000 : 1900;
4110
4111         if (year < 1969)
4112                 return -1;
4113
4114         return year;
4115 }
4116
4117 static gboolean
4118 get_time (const gchar *in,
4119           gsize inlen,
4120           gint *hour,
4121           gint *min,
4122           gint *sec)
4123 {
4124         register const gchar *inptr;
4125         gint *val, colons = 0;
4126         const gchar *inend;
4127
4128         *hour = *min = *sec = 0;
4129
4130         inend = in + inlen;
4131         val = hour;
4132         for (inptr = in; inptr < inend; inptr++) {
4133                 if (*inptr == ':') {
4134                         colons++;
4135                         switch (colons) {
4136                         case 1:
4137                                 val = min;
4138                                 break;
4139                         case 2:
4140                                 val = sec;
4141                                 break;
4142                         default:
4143                                 return FALSE;
4144                         }
4145                 } else if (!(*inptr >= '0' && *inptr <= '9'))
4146                         return FALSE;
4147                 else
4148                         *val = (*val * 10) + (*inptr - '0');
4149         }
4150
4151         return TRUE;
4152 }
4153
4154 static gint
4155 get_tzone (struct _date_token **token)
4156 {
4157         const gchar *inptr, *inend;
4158         gsize inlen;
4159         gint i, t;
4160
4161         for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
4162                 inptr = (*token)->start;
4163                 inlen = (*token)->len;
4164                 inend = inptr + inlen;
4165
4166                 if (*inptr == '+' || *inptr == '-') {
4167                         return decode_int (inptr, inlen);
4168                 } else {
4169                         if (*inptr == '(') {
4170                                 inptr++;
4171                                 if (*(inend - 1) == ')')
4172                                         inlen -= 2;
4173                                 else
4174                                         inlen--;
4175                         }
4176
4177                         for (t = 0; t < 15; t++) {
4178                                 gsize len = strlen (tz_offsets[t].name);
4179
4180                                 if (len != inlen)
4181                                         continue;
4182
4183                                 if (!strncmp (inptr, tz_offsets[t].name, len))
4184                                         return tz_offsets[t].offset;
4185                         }
4186                 }
4187         }
4188
4189         return -1;
4190 }
4191
4192 static time_t
4193 parse_rfc822_date (struct _date_token *tokens,
4194                    gint *tzone)
4195 {
4196         gint hour, min, sec, offset, n;
4197         struct _date_token *token;
4198         struct tm tm;
4199         time_t t;
4200
4201         g_return_val_if_fail (tokens != NULL, (time_t) 0);
4202
4203         token = tokens;
4204
4205         memset ((gpointer) &tm, 0, sizeof (struct tm));
4206
4207         if ((n = get_wday (token->start, token->len)) != -1) {
4208                 /* not all dates may have this... */
4209                 tm.tm_wday = n;
4210                 token = token->next;
4211         }
4212
4213         /* get the mday */
4214         if (!token || (n = get_mday (token->start, token->len)) == -1)
4215                 return (time_t) 0;
4216
4217         tm.tm_mday = n;
4218         token = token->next;
4219
4220         /* get the month */
4221         if (!token || (n = get_month (token->start, token->len)) == -1)
4222                 return (time_t) 0;
4223
4224         tm.tm_mon = n;
4225         token = token->next;
4226
4227         /* get the year */
4228         if (!token || (n = get_year (token->start, token->len)) == -1)
4229                 return (time_t) 0;
4230
4231         tm.tm_year = n - 1900;
4232         token = token->next;
4233
4234         /* get the hour/min/sec */
4235         if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
4236                 return (time_t) 0;
4237
4238         tm.tm_hour = hour;
4239         tm.tm_min = min;
4240         tm.tm_sec = sec;
4241         token = token->next;
4242
4243         if (token && token->start && (
4244             g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4245             g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4246                 /* not a valid RFC 822 time representation */
4247                 return 0;
4248         }
4249
4250         /* get the timezone */
4251         if (!token || (n = get_tzone (&token)) == -1) {
4252                 /* I guess we assume tz is GMT? */
4253                 offset = 0;
4254         } else {
4255                 offset = n;
4256         }
4257
4258         t = camel_mktime_utc (&tm);
4259
4260         /* t is now GMT of the time we want, but not offset by the timezone ... */
4261
4262         /* this should convert the time to the GMT equiv time */
4263         t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4264
4265         if (tzone)
4266                 *tzone = offset;
4267
4268         return t;
4269 }
4270
4271 #define date_token_mask(t)  (((struct _date_token *) t)->mask)
4272 #define is_numeric(t)       ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
4273 #define is_weekday(t)       ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
4274 #define is_month(t)         ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
4275 #define is_time(t)          (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
4276 #define is_tzone_alpha(t)   ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
4277 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
4278 #define is_tzone(t)         (is_tzone_alpha (t) || is_tzone_numeric (t))
4279
4280 static time_t
4281 parse_broken_date (struct _date_token *tokens,
4282                    gint *tzone)
4283 {
4284         gboolean got_wday, got_month, got_tzone, is_pm;
4285         gint hour, min, sec, offset, n;
4286         struct _date_token *token;
4287         struct tm tm;
4288         time_t t;
4289
4290         memset ((gpointer) &tm, 0, sizeof (struct tm));
4291         got_wday = got_month = got_tzone = FALSE;
4292         is_pm = FALSE;
4293         offset = 0;
4294
4295         token = tokens;
4296         while (token) {
4297                 if (is_weekday (token) && !got_wday) {
4298                         if ((n = get_wday (token->start, token->len)) != -1) {
4299                                 d (printf ("weekday; "));
4300                                 got_wday = TRUE;
4301                                 tm.tm_wday = n;
4302                                 goto next;
4303                         }
4304                 }
4305
4306                 if (is_month (token) && !got_month) {
4307                         if ((n = get_month (token->start, token->len)) != -1) {
4308                                 d (printf ("month; "));
4309                                 got_month = TRUE;
4310                                 tm.tm_mon = n;
4311                                 goto next;
4312                         }
4313                 }
4314
4315                 if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
4316                         if (get_time (token->start, token->len, &hour, &min, &sec)) {
4317                                 d (printf ("time; "));
4318                                 tm.tm_hour = hour;
4319                                 tm.tm_min = min;
4320                                 tm.tm_sec = sec;
4321                                 goto next;
4322                         }
4323                 }
4324
4325                 if (!got_tzone && token->start && (
4326                     g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4327                     g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4328                         is_pm = g_ascii_strncasecmp (token->start, "PM", 2) == 0;
4329
4330                         goto next;
4331                 }
4332
4333                 if (is_tzone (token) && !got_tzone) {
4334                         struct _date_token *t = token;
4335
4336                         if ((n = get_tzone (&t)) != -1) {
4337                                 d (printf ("tzone; "));
4338                                 got_tzone = TRUE;
4339                                 offset = n;
4340                                 goto next;
4341                         }
4342                 }
4343
4344                 if (is_numeric (token)) {
4345                         if (token->len == 4 && !tm.tm_year) {
4346                                 if ((n = get_year (token->start, token->len)) != -1) {
4347                                         d (printf ("year; "));
4348                                         tm.tm_year = n - 1900;
4349                                         goto next;
4350                                 }
4351                         } else {
4352                                 /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
4353                                 if (!got_month && token->next && is_numeric (token->next)) {
4354                                         if ((n = decode_int (token->start, token->len)) > 12) {
4355                                                 goto mday;
4356                                         } else if (n > 0) {
4357                                                 d (printf ("mon; "));
4358                                                 got_month = TRUE;
4359                                                 tm.tm_mon = n - 1;
4360                                         }
4361                                         goto next;
4362                                 } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
4363                                 mday:
4364                                         d (printf ("mday; "));
4365                                         tm.tm_mday = n;
4366                                         goto next;
4367                                 } else if (!tm.tm_year) {
4368                                         if ((n = get_year (token->start, token->len)) != -1) {
4369                                                 d (printf ("2-digit year; "));
4370                                                 tm.tm_year = n - 1900;
4371                                         }
4372                                         goto next;
4373                                 }
4374                         }
4375                 }
4376
4377                 d (printf ("???; "));
4378
4379         next:
4380
4381                 token = token->next;
4382         }
4383
4384         d (printf ("\n"));
4385
4386         t = camel_mktime_utc (&tm);
4387
4388         /* t is now GMT of the time we want, but not offset by the timezone ... */
4389
4390         /* this should convert the time to the GMT equiv time */
4391         t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4392
4393         if (is_pm)
4394                 t += 12 * 60 * 60;
4395
4396         if (tzone)
4397                 *tzone = offset;
4398
4399         return t;
4400 }
4401
4402 /**
4403  * camel_header_decode_date:
4404  * @str: input date string
4405  * @tz_offset: timezone offset
4406  *
4407  * Decodes the rfc822 date string and saves the GMT offset into
4408  * @tz_offset if non-NULL.
4409  *
4410  * Returns: the time_t representation of the date string specified by
4411  * @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
4412  * of the timezone offset will be stored.
4413  **/
4414 time_t
4415 camel_header_decode_date (const gchar *str,
4416                           gint *tz_offset)
4417 {
4418         struct _date_token *token, *tokens;
4419         time_t date;
4420
4421         if (!str || !(tokens = datetok (str))) {
4422                 if (tz_offset)
4423                         *tz_offset = 0;
4424
4425                 return (time_t) 0;
4426         }
4427
4428         if (!(date = parse_rfc822_date (tokens, tz_offset)))
4429                 date = parse_broken_date (tokens, tz_offset);
4430
4431         /* cleanup */
4432         while (tokens) {
4433                 token = tokens;
4434                 tokens = tokens->next;
4435                 g_free (token);
4436         }
4437
4438         return date;
4439 }
4440
4441 gchar *
4442 camel_header_location_decode (const gchar *in)
4443 {
4444         gint quote = 0;
4445         GString *out = g_string_new ("");
4446         gchar c, *res;
4447
4448         /* Sigh. RFC2557 says:
4449          *   content-location =   "Content-Location:" [CFWS] URI [CFWS]
4450          *      where URI is restricted to the syntax for URLs as
4451          *      defined in Uniform Resource Locators [URL] until
4452          *      IETF specifies other kinds of URIs.
4453          *
4454          * But Netscape puts quotes around the URI when sending web
4455          * pages.
4456          *
4457          * Which is required as defined in rfc2017 [3.1].  Although
4458          * outlook doesn't do this.
4459          *
4460          * Since we get headers already unfolded, we need just drop
4461          * all whitespace.  URL's cannot contain whitespace or quoted
4462          * characters, even when included in quotes.
4463          */
4464
4465         header_decode_lwsp (&in);
4466         if (*in == '"') {
4467                 in++;
4468                 quote = 1;
4469         }
4470
4471         while ((c = *in++)) {
4472                 if (quote && c == '"')
4473                         break;
4474                 if (!camel_mime_is_lwsp (c))
4475                         g_string_append_c (out, c);
4476         }
4477
4478         res = g_strdup (out->str);
4479         g_string_free (out, TRUE);
4480
4481         return res;
4482 }
4483
4484 /* extra rfc checks */
4485 #define CHECKS
4486
4487 #ifdef CHECKS
4488 static void
4489 check_header (struct _camel_header_raw *header)
4490 {
4491         guchar *cp;
4492
4493         cp = (guchar *) header->value;
4494         while (cp && *cp) {
4495                 if (!isascii (*cp)) {
4496                         w (g_warning ("Appending header violates rfc: %s: %s", header->name, header->value));
4497                         return;
4498                 }
4499                 cp++;
4500         }
4501 }
4502 #endif
4503
4504 void
4505 camel_header_raw_append_parse (struct _camel_header_raw **list,
4506                                const gchar *header,
4507                                gint offset)
4508 {
4509         register const gchar *in;
4510         gsize fieldlen;
4511         gchar *name;
4512
4513         in = header;
4514         while (camel_mime_is_fieldname (*in) || *in == ':')
4515                 in++;
4516         fieldlen = in - header - 1;
4517         while (camel_mime_is_lwsp (*in))
4518                 in++;
4519         if (fieldlen == 0 || header[fieldlen] != ':') {
4520                 printf ("Invalid header line: '%s'\n", header);
4521                 return;
4522         }
4523         name = g_alloca (fieldlen + 1);
4524         memcpy (name, header, fieldlen);
4525         name[fieldlen] = 0;
4526
4527         camel_header_raw_append (list, name, in, offset);
4528 }
4529
4530 void
4531 camel_header_raw_append (struct _camel_header_raw **list,
4532                          const gchar *name,
4533                          const gchar *value,
4534                          gint offset)
4535 {
4536         struct _camel_header_raw *l, *n;
4537
4538         d (printf ("Header: %s: %s\n", name, value));
4539
4540         n = g_malloc (sizeof (*n));
4541         n->next = NULL;
4542         n->name = g_strdup (name);
4543         n->value = g_strdup (value);
4544         n->offset = offset;
4545 #ifdef CHECKS
4546         check_header (n);
4547 #endif
4548         l = (struct _camel_header_raw *) list;
4549         while (l->next) {
4550                 l = l->next;
4551         }
4552         l->next = n;
4553
4554         /* debug */
4555 #if 0
4556         if (!g_ascii_strcasecmp (name, "To")) {
4557                 printf ("- Decoding To\n");
4558                 camel_header_to_decode (value);
4559         } else if (!g_ascii_strcasecmp (name, "Content-type")) {
4560                 printf ("- Decoding content-type\n");
4561                 camel_content_type_dump (camel_content_type_decode (value));
4562         } else if (!g_ascii_strcasecmp (name, "MIME-Version")) {
4563                 printf ("- Decoding mime version\n");
4564                 camel_header_mime_decode (value);
4565         }
4566 #endif
4567 }
4568
4569 static struct _camel_header_raw *
4570 header_raw_find_node (struct _camel_header_raw **list,
4571                       const gchar *name)
4572 {
4573         struct _camel_header_raw *l;
4574
4575         l = *list;
4576         while (l) {
4577                 if (!g_ascii_strcasecmp (l->name, name))
4578                         break;
4579                 l = l->next;
4580         }
4581         return l;
4582 }
4583
4584 const gchar *
4585 camel_header_raw_find (struct _camel_header_raw **list,
4586                        const gchar *name,
4587                        gint *offset)
4588 {
4589         struct _camel_header_raw *l;
4590
4591         l = header_raw_find_node (list, name);
4592         if (l) {
4593                 if (offset)
4594                         *offset = l->offset;
4595                 return l->value;
4596         } else
4597                 return NULL;
4598 }
4599
4600 const gchar *
4601 camel_header_raw_find_next (struct _camel_header_raw **list,
4602                             const gchar *name,
4603                             gint *offset,
4604                             const gchar *last)
4605 {
4606         struct _camel_header_raw *l;
4607
4608         if (last == NULL || name == NULL)
4609                 return NULL;
4610
4611         l = *list;
4612         while (l && l->value != last)
4613                 l = l->next;
4614         return camel_header_raw_find (&l, name, offset);
4615 }
4616
4617 static void
4618 header_raw_free (struct _camel_header_raw *l)
4619 {
4620         g_free (l->name);
4621         g_free (l->value);
4622         g_free (l);
4623 }
4624
4625 void
4626 camel_header_raw_remove (struct _camel_header_raw **list,
4627                          const gchar *name)
4628 {
4629         struct _camel_header_raw *l, *p;
4630
4631         /* the next pointer is at the head of the structure, so this is safe */
4632         p = (struct _camel_header_raw *) list;
4633         l = *list;
4634         while (l) {
4635                 if (!g_ascii_strcasecmp (l->name, name)) {
4636                         p->next = l->next;
4637                         header_raw_free (l);
4638                         l = p->next;
4639                 } else {
4640                         p = l;
4641                         l = l->next;
4642                 }
4643         }
4644 }
4645
4646 void
4647 camel_header_raw_replace (struct _camel_header_raw **list,
4648                           const gchar *name,
4649                           const gchar *value,
4650                           gint offset)
4651 {
4652         camel_header_raw_remove (list, name);
4653         camel_header_raw_append (list, name, value, offset);
4654 }
4655
4656 void
4657 camel_header_raw_clear (struct _camel_header_raw **list)
4658 {
4659         struct _camel_header_raw *l, *n;
4660         l = *list;
4661         while (l) {
4662                 n = l->next;
4663                 header_raw_free (l);
4664                 l = n;
4665         }
4666         *list = NULL;
4667 }
4668
4669 gchar *
4670 camel_header_msgid_generate (void)
4671 {
4672         static GMutex count_lock;
4673 #define COUNT_LOCK() g_mutex_lock (&count_lock)
4674 #define COUNT_UNLOCK() g_mutex_unlock (&count_lock)
4675         gchar host[MAXHOSTNAMELEN];
4676         const gchar *name;
4677         static gint count = 0;
4678         gchar *msgid;
4679         gint retval;
4680         struct addrinfo *ai = NULL, hints = { 0 };
4681         static gchar *cached_hostname = NULL;
4682
4683         if (!cached_hostname) {
4684                 retval = gethostname (host, sizeof (host));
4685                 if (retval == 0 && *host) {
4686                         hints.ai_flags = AI_CANONNAME;
4687                         ai = camel_getaddrinfo (
4688                                 host, NULL, &hints, NULL, NULL);
4689                         if (ai && ai->ai_canonname)
4690                                 name = ai->ai_canonname;
4691                         else
4692                                 name = host;
4693                 } else
4694                         name = "localhost.localdomain";
4695
4696                 cached_hostname = g_strdup (name);
4697         }
4698
4699         COUNT_LOCK ();
4700         msgid = g_strdup_printf ("%d.%d.%d.camel@%s", (gint) time (NULL), getpid (), count++, cached_hostname);
4701         COUNT_UNLOCK ();
4702
4703         if (ai)
4704                 camel_freeaddrinfo (ai);
4705
4706         return msgid;
4707 }
4708
4709 static struct {
4710         const gchar *name;
4711         const gchar *pattern;
4712         regex_t regex;
4713 } mail_list_magic[] = {
4714         /* List-Post: <mailto:gnome-hackers@gnome.org> */
4715         /* List-Post: <mailto:gnome-hackers> */
4716         { "List-Post", "[ \t]*<mailto:([^@>]+)@?([^ \n\t\r>]*)" },
4717         /* List-Id: GNOME stuff <gnome-hackers.gnome.org> */
4718         /* List-Id: <gnome-hackers.gnome.org> */
4719         /* List-Id: <gnome-hackers> */
4720         /* This old one wasn't very useful: { "List-Id", " *([^<]+)" },*/
4721         { "List-Id", "[^<]*<([^\\.>]+)\\.?([^ \n\t\r>]*)" },
4722         /* Mailing-List: list gnome-hackers@gnome.org; contact gnome-hackers-owner@gnome.org */
4723         { "Mailing-List", "[ \t]*list ([^@]+)@?([^ \n\t\r>;]*)" },
4724         /* Originator: gnome-hackers@gnome.org */
4725         { "Originator", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4726         /* X-Mailing-List: <gnome-hackers@gnome.org> arcive/latest/100 */
4727         /* X-Mailing-List: gnome-hackers@gnome.org */
4728         /* X-Mailing-List: gnome-hackers */
4729         /* X-Mailing-List: <gnome-hackers> */
4730         { "X-Mailing-List", "[ \t]*<?([^@>]+)@?([^ \n\t\r>]*)" },
4731         /* X-Loop: gnome-hackers@gnome.org */
4732         { "X-Loop", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4733         /* X-List: gnome-hackers */
4734         /* X-List: gnome-hackers@gnome.org */
4735         { "X-List", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4736         /* Sender: owner-gnome-hackers@gnome.org */
4737         /* Sender: owner-gnome-hacekrs */
4738         { "Sender", "[ \t]*owner-([^@]+)@?([^ @\n\t\r>]*)" },
4739         /* Sender: gnome-hackers-owner@gnome.org */
4740         /* Sender: gnome-hackers-owner */
4741         { "Sender", "[ \t]*([^@]+)-owner@?([^ @\n\t\r>]*)" },
4742         /* Delivered-To: mailing list gnome-hackers@gnome.org */
4743         /* Delivered-To: mailing list gnome-hackers */
4744         { "Delivered-To", "[ \t]*mailing list ([^@]+)@?([^ \n\t\r>]*)" },
4745         /* Sender: owner-gnome-hackers@gnome.org */
4746         /* Sender: <owner-gnome-hackers@gnome.org> */
4747         /* Sender: owner-gnome-hackers */
4748         /* Sender: <owner-gnome-hackers> */
4749         { "Return-Path", "[ \t]*<?owner-([^@>]+)@?([^ \n\t\r>]*)" },
4750         /* X-BeenThere: gnome-hackers@gnome.org */
4751         /* X-BeenThere: gnome-hackers */
4752         { "X-BeenThere", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4753         /* List-Unsubscribe:  <mailto:gnome-hackers-unsubscribe@gnome.org> */
4754         { "List-Unsubscribe", "<mailto:(.+)-unsubscribe@([^ \n\t\r>]*)" },
4755 };
4756
4757 static gpointer
4758 mailing_list_init (gpointer param)
4759 {
4760         gint i, errcode, failed = 0;
4761
4762         /* precompile regex's for speed at runtime */
4763         for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4764                 errcode = regcomp (&mail_list_magic[i].regex, mail_list_magic[i].pattern, REG_EXTENDED | REG_ICASE);
4765                 if (errcode != 0) {
4766                         gchar *errstr;
4767                         gsize len;
4768
4769                         len = regerror (errcode, &mail_list_magic[i].regex, NULL, 0);
4770                         errstr = g_malloc0 (len + 1);
4771                         regerror (errcode, &mail_list_magic[i].regex, errstr, len);
4772
4773                         g_warning ("Internal error, compiling regex failed: %s: %s", mail_list_magic[i].pattern, errstr);
4774                         g_free (errstr);
4775                         failed++;
4776                 }
4777         }
4778
4779         g_assert (failed == 0);
4780
4781         return NULL;
4782 }
4783
4784 gchar *
4785 camel_header_raw_check_mailing_list (struct _camel_header_raw **list)
4786 {
4787         static GOnce once = G_ONCE_INIT;
4788         const gchar *v;
4789         regmatch_t match[3];
4790         gint i, j;
4791
4792         g_once (&once, mailing_list_init, NULL);
4793
4794         for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4795                 v = camel_header_raw_find (list, mail_list_magic[i].name, NULL);
4796                 for (j = 0; j < 3; j++) {
4797                         match[j].rm_so = -1;
4798                         match[j].rm_eo = -1;
4799                 }
4800                 if (v != NULL && regexec (&mail_list_magic[i].regex, v, 3, match, 0) == 0 && match[1].rm_so != -1) {
4801                         gint len1, len2;
4802                         gchar *mlist;
4803
4804                         len1 = match[1].rm_eo - match[1].rm_so;
4805                         len2 = match[2].rm_eo - match[2].rm_so;
4806
4807                         mlist = g_malloc (len1 + len2 + 2);
4808                         memcpy (mlist, v + match[1].rm_so, len1);
4809                         if (len2) {
4810                                 mlist[len1] = '@';
4811                                 memcpy (mlist + len1 + 1, v + match[2].rm_so, len2);
4812                                 mlist[len1 + len2 + 1] = '\0';
4813                         } else {
4814                                 mlist[len1] = '\0';
4815                         }
4816
4817                         return mlist;
4818                 }
4819         }
4820
4821         return NULL;
4822 }
4823
4824 /* ok, here's the address stuff, what a mess ... */
4825 struct _camel_header_address *
4826 camel_header_address_new (void)
4827 {
4828         struct _camel_header_address *h;
4829         h = g_malloc0 (sizeof (*h));
4830         h->type = CAMEL_HEADER_ADDRESS_NONE;
4831         h->refcount = 1;
4832         return h;
4833 }
4834
4835 struct _camel_header_address *
4836 camel_header_address_new_name (const gchar *name,
4837                                const gchar *addr)
4838 {
4839         struct _camel_header_address *h;
4840         h = camel_header_address_new ();
4841         h->type = CAMEL_HEADER_ADDRESS_NAME;
4842         h->name = g_strdup (name);
4843         h->v.addr = g_strdup (addr);
4844         return h;
4845 }
4846
4847 struct _camel_header_address *
4848 camel_header_address_new_group (const gchar *name)
4849 {
4850         struct _camel_header_address *h;
4851
4852         h = camel_header_address_new ();
4853         h->type = CAMEL_HEADER_ADDRESS_GROUP;
4854         h->name = g_strdup (name);
4855         return h;
4856 }
4857
4858 void
4859 camel_header_address_ref (struct _camel_header_address *h)
4860 {
4861         if (h)
4862                 h->refcount++;
4863 }
4864
4865 void
4866 camel_header_address_unref (struct _camel_header_address *h)
4867 {
4868         if (h) {
4869                 if (h->refcount <= 1) {
4870                         if (h->type == CAMEL_HEADER_ADDRESS_GROUP) {
4871                                 camel_header_address_list_clear (&h->v.members);
4872                         } else if (h->type == CAMEL_HEADER_ADDRESS_NAME) {
4873                                 g_free (h->v.addr);
4874                         }
4875                         g_free (h->name);
4876                         g_free (h);
4877                 } else {
4878                         h->refcount--;
4879                 }
4880         }
4881 }
4882
4883 void
4884 camel_header_address_set_name (struct _camel_header_address *h,
4885                                const gchar *name)
4886 {
4887         if (h) {
4888                 g_free (h->name);
4889                 h->name = g_strdup (name);
4890         }
4891 }
4892
4893 void
4894 camel_header_address_set_addr (struct _camel_header_address *h,
4895                                const gchar *addr)
4896 {
4897         if (h) {
4898                 if (h->type == CAMEL_HEADER_ADDRESS_NAME
4899                     || h->type == CAMEL_HEADER_ADDRESS_NONE) {
4900                         h->type = CAMEL_HEADER_ADDRESS_NAME;
4901                         g_free (h->v.addr);
4902                         h->v.addr = g_strdup (addr);
4903                 } else {
4904                         g_warning ("Trying to set the address on a group");
4905                 }
4906         }
4907 }
4908
4909 void
4910 camel_header_address_set_members (struct _camel_header_address *h,
4911                                   struct _camel_header_address *group)
4912 {
4913         if (h) {
4914                 if (h->type == CAMEL_HEADER_ADDRESS_GROUP
4915                     || h->type == CAMEL_HEADER_ADDRESS_NONE) {
4916                         h->type = CAMEL_HEADER_ADDRESS_GROUP;
4917                         camel_header_address_list_clear (&h->v.members);
4918                         /* should this ref them? */
4919                         h->v.members = group;
4920                 } else {
4921                         g_warning ("Trying to set the members on a name, not group");
4922                 }
4923         }
4924 }
4925
4926 void
4927 camel_header_address_add_member (struct _camel_header_address *h,
4928                                  struct _camel_header_address *member)
4929 {
4930         if (h) {
4931                 if (h->type == CAMEL_HEADER_ADDRESS_GROUP
4932                     || h->type == CAMEL_HEADER_ADDRESS_NONE) {
4933                         h->type = CAMEL_HEADER_ADDRESS_GROUP;
4934                         camel_header_address_list_append (&h->v.members, member);
4935                 }
4936         }
4937 }
4938
4939 void
4940 camel_header_address_list_append_list (struct _camel_header_address **l,
4941                                        struct _camel_header_address **h)
4942 {
4943         if (l) {
4944                 struct _camel_header_address *n = (struct _camel_header_address *) l;
4945
4946                 while (n->next)
4947                         n = n->next;
4948                 n->next = *h;
4949         }
4950 }
4951
4952 void
4953 camel_header_address_list_append (struct _camel_header_address **l,
4954                                   struct _camel_header_address *h)
4955 {
4956         if (h) {
4957                 camel_header_address_list_append_list (l, &h);
4958                 h->next = NULL;
4959         }
4960 }
4961
4962 void
4963 camel_header_address_list_clear (struct _camel_header_address **l)
4964 {
4965         struct _camel_header_address *a, *n;
4966         a = *l;
4967         while (a) {
4968                 n = a->next;
4969                 camel_header_address_unref (a);
4970                 a = n;
4971         }
4972         *l = NULL;
4973 }
4974
4975 /* if encode is true, then the result is suitable for mailing, otherwise
4976  * the result is suitable for display only (and may not even be re-parsable) */
4977 static void
4978 header_address_list_encode_append (GString *out,
4979                                    gint encode,
4980                                    struct _camel_header_address *a)
4981 {
4982         gchar *text;
4983
4984         while (a) {
4985                 switch (a->type) {
4986                 case CAMEL_HEADER_ADDRESS_NAME:
4987                         if (encode)
4988                                 text = camel_header_encode_phrase ((guchar *) a->name);
4989                         else
4990                                 text = a->name;
4991                         if (text && *text)
4992                                 g_string_append_printf (out, "%s <%s>", text, a->v.addr);
4993                         else
4994                                 g_string_append (out, a->v.addr);
4995                         if (encode)
4996                                 g_free (text);
4997                         break;
4998                 case CAMEL_HEADER_ADDRESS_GROUP:
4999                         if (encode)
5000                                 text = camel_header_encode_phrase ((guchar *) a->name);
5001                         else
5002                                 text = a->name;
5003                         g_string_append_printf (out, "%s: ", text);
5004                         header_address_list_encode_append (out, encode, a->v.members);
5005                         g_string_append_printf (out, ";");
5006                         if (encode)
5007                                 g_free (text);
5008                         break;
5009                 default:
5010                         g_warning ("Invalid address type");
5011                         break;
5012                 }
5013                 a = a->next;
5014                 if (a)
5015                         g_string_append (out, ", ");
5016         }
5017 }
5018
5019 gchar *
5020 camel_header_address_list_encode (struct _camel_header_address *a)
5021 {
5022         GString *out;
5023         gchar *ret;
5024
5025         if (a == NULL)
5026                 return NULL;
5027
5028         out = g_string_new ("");
5029         header_address_list_encode_append (out, TRUE, a);
5030         ret = out->str;
5031         g_string_free (out, FALSE);
5032
5033         return ret;
5034 }
5035
5036 gchar *
5037 camel_header_address_list_format (struct _camel_header_address *a)
5038 {
5039         GString *out;
5040         gchar *ret;
5041
5042         if (a == NULL)
5043                 return NULL;
5044
5045         out = g_string_new ("");
5046
5047         header_address_list_encode_append (out, FALSE, a);
5048         ret = out->str;
5049         g_string_free (out, FALSE);
5050
5051         return ret;
5052 }
5053
5054 gchar *
5055 camel_header_address_fold (const gchar *in,
5056                            gsize headerlen)
5057 {
5058         gsize len, outlen;
5059         const gchar *inptr = in, *space, *p, *n;
5060         GString *out;
5061         gchar *ret;
5062         gint i, needunfold = FALSE;
5063
5064         if (in == NULL)
5065                 return NULL;
5066
5067         /* first, check to see if we even need to fold */
5068         len = headerlen + 2;
5069         p = in;
5070         while (*p) {
5071                 n = strchr (p, '\n');
5072                 if (n == NULL) {
5073                         len += strlen (p);
5074                         break;
5075                 }
5076
5077                 needunfold = TRUE;
5078                 len += n - p;
5079
5080                 if (len >= CAMEL_FOLD_SIZE)
5081                         break;
5082                 len = 0;
5083                 p = n + 1;
5084         }
5085         if (len < CAMEL_FOLD_SIZE)
5086                 return g_strdup (in);
5087
5088         /* we need to fold, so first unfold (if we need to), then process */
5089         if (needunfold)
5090                 inptr = in = camel_header_unfold (in);
5091
5092         out = g_string_new ("");
5093         outlen = headerlen + 2;
5094         while (*inptr) {
5095                 space = strchr (inptr, ' ');
5096                 if (space) {
5097                         len = space - inptr + 1;
5098                 } else {
5099                         len = strlen (inptr);
5100                 }
5101
5102                 d (printf ("next word '%.*s'\n", len, inptr));
5103
5104                 if (outlen + len > CAMEL_FOLD_SIZE) {
5105                         d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5106                         /* strip trailing space */
5107                         if (out->len > 0 && out->str[out->len - 1] == ' ')
5108                                 g_string_truncate (out, out->len - 1);
5109                         g_string_append (out, "\n\t");
5110                         outlen = 1;
5111                 }
5112
5113                 outlen += len;
5114                 for (i = 0; i < len; i++) {
5115                         g_string_append_c (out, inptr[i]);
5116                 }
5117
5118                 inptr += len;
5119         }
5120         ret = out->str;
5121         g_string_free (out, FALSE);
5122
5123         if (needunfold)
5124                 g_free ((gchar *) in);
5125
5126         return ret;
5127 }
5128
5129 /* simple header folding */
5130 /* will work even if the header is already folded */
5131 gchar *
5132 camel_header_fold (const gchar *in,
5133                    gsize headerlen)
5134 {
5135         gsize len, outlen, tmplen;
5136         const gchar *inptr = in, *space, *p, *n;
5137         GString *out;
5138         gchar *ret;
5139         gint needunfold = FALSE;
5140         gchar spc;
5141
5142         if (in == NULL)
5143                 return NULL;
5144
5145         /* first, check to see if we even need to fold */
5146         len = headerlen + 2;
5147         p = in;
5148         while (*p) {
5149                 n = strchr (p, '\n');
5150                 if (n == NULL) {
5151                         len += strlen (p);
5152                         break;
5153                 }
5154
5155                 needunfold = TRUE;
5156                 len += n - p;
5157
5158                 if (len >= CAMEL_FOLD_SIZE)
5159                         break;
5160                 len = 0;
5161                 p = n + 1;
5162         }
5163         if (len < CAMEL_FOLD_SIZE)
5164                 return g_strdup (in);
5165
5166         /* we need to fold, so first unfold (if we need to), then process */
5167         if (needunfold)
5168                 inptr = in = camel_header_unfold (in);
5169
5170         out = g_string_new ("");
5171         outlen = headerlen + 2;
5172         while (*inptr) {
5173                 space = inptr;
5174                 while (*space && *space != ' ' && *space != '\t')
5175                         space++;
5176
5177                 if (*space)
5178                         len = space - inptr + 1;
5179                 else
5180                         len = space - inptr;
5181
5182                 d (printf ("next word '%.*s'\n", len, inptr));
5183                 if (outlen + len > CAMEL_FOLD_SIZE) {
5184                         d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5185                         /* strip trailing space */
5186                         if (out->len > 0 && (out->str[out->len - 1] == ' ' || out->str[out->len - 1] == '\t')) {
5187                                 spc = out->str[out->len - 1];
5188                                 g_string_truncate (out, out->len - 1);
5189                                 g_string_append_c (out, '\n');
5190                                 g_string_append_c (out, spc);
5191                         } else {
5192                                 g_string_append (out, "\n\t");
5193                         }
5194
5195                         outlen = 1;
5196
5197                         /* check for very long words, just cut them up */
5198                         while (outlen + len > CAMEL_FOLD_MAX_SIZE) {
5199                                 tmplen = CAMEL_FOLD_MAX_SIZE - outlen;
5200                                 g_string_append_len (out, inptr, tmplen);
5201                                 g_string_append (out, "\n\t");
5202                                 inptr += tmplen;
5203                                 len -= tmplen;
5204                                 outlen = 1;
5205                         }
5206                 }
5207
5208                 g_string_append_len (out, inptr, len);
5209                 outlen += len;
5210                 inptr += len;
5211         }
5212         ret = out->str;
5213         g_string_free (out, FALSE);
5214
5215         if (needunfold)
5216                 g_free ((gchar *) in);
5217
5218         return ret;
5219 }
5220
5221 gchar *
5222 camel_header_unfold (const gchar *in)
5223 {
5224         const gchar *inptr = in;
5225         gchar c, *o, *out;
5226
5227         if (in == NULL)
5228                 return NULL;
5229
5230         out = g_malloc (strlen (in) + 1);
5231
5232         o = out;
5233         while ((c = *inptr++)) {
5234                 if (c == '\n') {
5235                         if (camel_mime_is_lwsp (*inptr)) {
5236                                 do {
5237                                         inptr++;
5238                                 } while (camel_mime_is_lwsp (*inptr));
5239                                 *o++ = ' ';
5240                         } else {
5241                                 *o++ = c;
5242                         }
5243                 } else {
5244                         *o++ = c;
5245                 }
5246         }
5247         *o = 0;
5248
5249         return out;
5250 }