1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 1999-2008 Novell, Inc. (www.novell.com)
5 * Authors: Michael Zucchi <notzed@ximian.com>
6 * Jeffrey Stedfast <fejj@ximian.com>
8 * This program is free software; you can redistribute it and/or
9 * modify it under the terms of version 2 of the GNU Lesser General Public
10 * License as published by the Free Software Foundation.
12 * This program is distributed in the hope that it will be useful,
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 * General Public License for more details.
17 * You should have received a copy of the GNU Lesser General Public
18 * License along with this program; if not, write to the
19 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 * Boston, MA 02110-1301, USA.
30 #include <sys/types.h>
31 #include <sys/param.h> /* for MAXHOSTNAMELEN */
40 #ifndef MAXHOSTNAMELEN
41 #define MAXHOSTNAMELEN 1024
44 #include "camel-charset-map.h"
45 #include "camel-iconv.h"
46 #include "camel-mime-utils.h"
47 #include "camel-net-utils.h"
55 #include "camel-utf8.h"
62 /* The gmtime() in Microsoft's C library is MT-safe */
63 #define gmtime_r(tp,tmp) (gmtime(tp)?(*(tmp)=*gmtime(tp),(tmp)):0)
66 /* for all non-essential warnings ... */
74 * @tm: the #tm to convert to a calendar time representation
76 * Like mktime(3), but assumes UTC instead of local timezone.
78 * Returns: the calendar time representation of @tm
83 camel_mktime_utc (struct tm *tm)
90 #if defined (HAVE_TM_GMTOFF)
92 #elif defined (HAVE_TIMEZONE)
93 if (tm->tm_isdst > 0) {
94 #if defined (HAVE_ALTZONE)
97 tt -= (timezone - 3600);
107 * camel_localtime_with_offset:
108 * @tt: the #time_t to convert
109 * @tm: the #tm to store the result in
110 * @offset: the #gint to store the offset in
112 * Converts the calendar time representation @tt to a broken-down
113 * time representation, stored in @tm, and provides the offset in
114 * seconds from UTC time, stored in @offset.
117 camel_localtime_with_offset (time_t tt,
121 localtime_r (&tt, tm);
123 #if defined (HAVE_TM_GMTOFF)
124 *offset = tm->tm_gmtoff;
125 #elif defined (HAVE_TIMEZONE)
126 if (tm->tm_isdst > 0) {
127 #if defined (HAVE_ALTZONE)
130 *offset = -(timezone - 3600);
137 #define CAMEL_UUENCODE_CHAR(c) ((c) ? (c) + ' ' : '`')
138 #define CAMEL_UUDECODE_CHAR(c) (((c) - ' ') & 077)
140 static const guchar tohex[16] = {
141 '0', '1', '2', '3', '4', '5', '6', '7',
142 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
146 * camel_uuencode_close:
148 * @len: input stream length
149 * @out: output stream
150 * @uubuf: temporary buffer of 60 bytes
151 * @state: holds the number of bits that are stored in @save
152 * @save: leftover bits that have not yet been encoded
154 * Uuencodes a chunk of data. Call this when finished encoding data
155 * with camel_uuencode_step() to flush off the last little bit.
157 * Returns: the number of bytes encoded
160 camel_uuencode_close (guchar *in,
167 register guchar *outptr, *bufptr;
168 register guint32 saved;
169 gint uulen, uufill, i;
174 outptr += camel_uuencode_step (in, len, out, uubuf, state, save);
180 uulen = (*state >> 8) & 0xff;
182 bufptr = uubuf + ((uulen / 3) * 4);
192 /* convert 3 normal bytes into 4 uuencoded bytes */
195 b0 = (saved >> 16) & 0xff;
196 b1 = (saved >> 8) & 0xff;
199 *bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
200 *bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
201 *bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
202 *bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
211 gint cplen = ((uulen / 3) * 4);
213 *outptr++ = CAMEL_UUENCODE_CHAR ((uulen - uufill) & 0xff);
214 memcpy (outptr, uubuf, cplen);
220 *outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
230 * camel_uuencode_step:
232 * @len: input stream length
233 * @out: output stream
234 * @uubuf: temporary buffer of 60 bytes
235 * @state: holds the number of bits that are stored in @save
236 * @save: leftover bits that have not yet been encoded
238 * Uuencodes a chunk of data. Performs an 'encode step', only encodes
239 * blocks of 45 characters to the output at a time, saves left-over
240 * state in @uubuf, @state and @save (initialize to 0 on first
243 * Returns: the number of bytes encoded
246 camel_uuencode_step (guchar *in,
253 register guchar *inptr, *outptr, *bufptr;
254 guchar b0, b1, b2, *inend;
255 register guint32 saved;
267 uulen = (*state >> 8) & 0xff;
269 if ((len + uulen) < 45) {
270 /* not enough input to write a full uuencoded line */
271 bufptr = uubuf + ((uulen / 3) * 4);
276 /* copy the previous call's tmpbuf to outbuf */
277 memcpy (bufptr, uubuf, ((uulen / 3) * 4));
278 bufptr += ((uulen / 3) * 4);
283 b0 = (saved >> 8) & 0xff;
290 if ((inptr + 2) < inend) {
298 while (inptr < inend) {
299 saved = (saved << 8) | *inptr++;
304 while (inptr < inend) {
305 while (uulen < 45 && (inptr + 3) <= inend) {
312 /* convert 3 normal bytes into 4 uuencoded bytes */
313 *bufptr++ = CAMEL_UUENCODE_CHAR ((b0 >> 2) & 0x3f);
314 *bufptr++ = CAMEL_UUENCODE_CHAR (((b0 << 4) | ((b1 >> 4) & 0xf)) & 0x3f);
315 *bufptr++ = CAMEL_UUENCODE_CHAR (((b1 << 2) | ((b2 >> 6) & 0x3)) & 0x3f);
316 *bufptr++ = CAMEL_UUENCODE_CHAR (b2 & 0x3f);
322 *outptr++ = CAMEL_UUENCODE_CHAR (uulen & 0xff);
323 outptr += ((45 / 3) * 4) + 1;
328 if ((inptr + 45) <= inend) {
329 /* we have enough input to output another full line */
335 /* not enough input to continue... */
336 for (i = 0, saved = 0; inptr < inend; i++)
337 saved = (saved << 8) | *inptr++;
342 *state = ((uulen & 0xff) << 8) | (i & 0xff);
348 * camel_uudecode_step:
350 * @inlen: max length of data to decode
351 * @out: output stream
352 * @state: holds the number of bits that are stored in @save
353 * @save: leftover bits that have not yet been decoded
355 * Uudecodes a chunk of data. Performs a 'decode step' on a chunk of
356 * uuencoded data. Assumes the "begin mode filename" line has
359 * Returns: the number of bytes decoded
362 camel_uudecode_step (guchar *in,
368 register guchar *inptr, *outptr;
370 register guint32 saved;
371 gboolean last_was_eoln;
374 if (*state & CAMEL_UUDECODE_STATE_END)
379 uulen = (*state >> 8) & 0xff;
381 last_was_eoln = TRUE;
383 last_was_eoln = FALSE;
389 while (inptr < inend) {
390 if (*inptr == '\n') {
391 last_was_eoln = TRUE;
395 } else if (!uulen || last_was_eoln) {
396 /* first octet on a line is the uulen octet */
397 uulen = CAMEL_UUDECODE_CHAR (*inptr);
398 last_was_eoln = FALSE;
400 *state |= CAMEL_UUDECODE_STATE_END;
412 saved = (saved << 8) | ch;
415 /* convert 4 uuencoded bytes to 3 normal bytes */
416 guchar b0, b1, b2, b3;
419 b1 = saved >> 16 & 0xff;
420 b2 = saved >> 8 & 0xff;
424 *outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
425 *outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
426 *outptr++ = CAMEL_UUDECODE_CHAR (b2) << 6 | CAMEL_UUDECODE_CHAR (b3);
430 *outptr++ = CAMEL_UUDECODE_CHAR (b0) << 2 | CAMEL_UUDECODE_CHAR (b1) >> 4;
435 *outptr++ = CAMEL_UUDECODE_CHAR (b1) << 4 | CAMEL_UUDECODE_CHAR (b2) >> 2;
449 *state = (*state & CAMEL_UUDECODE_STATE_MASK) | ((uulen & 0xff) << 8) | (i & 0xff);
455 * camel_quoted_encode_close:
457 * @len: length of the input
458 * @out: output string
459 * @state: holds the number of bits that are stored in @save
460 * @save: leftover bits that have not yet been encoded
462 * Quoted-printable encodes a block of text. Call this when finished
463 * encoding data with camel_quoted_encode_step() to flush off
464 * the last little bit.
466 * Returns: the number of bytes encoded
469 camel_quoted_encode_close (guchar *in,
475 register guchar *outptr = out;
479 outptr += camel_quoted_encode_step (in, len, outptr, state, save);
483 /* space/tab must be encoded if it's the last character on
485 if (camel_mime_is_qpsafe (last) && last != ' ' && last != 9) {
489 *outptr++ = tohex[(last>>4) & 0xf];
490 *outptr++ = tohex[last & 0xf];
501 * camel_quoted_encode_step:
503 * @len: length of the input
504 * @out: output string
505 * @state: holds the number of bits that are stored in @save
506 * @save: leftover bits that have not yet been encoded
508 * Quoted-printable encodes a block of text. Performs an 'encode
509 * step', saves left-over state in state and save (initialise to -1 on
512 * Returns: the number of bytes encoded
515 camel_quoted_encode_step (guchar *in,
521 register guchar *inptr, *outptr, *inend;
523 register gint sofar = *save; /* keeps track of how many chars on a line */
524 register gint last = *statep; /* keeps track if last gchar to end was a space cr etc */
526 #define output_last() \
527 if (sofar + 3 > 74) { \
533 *outptr++ = tohex[(last >> 4) & 0xf]; \
534 *outptr++ = tohex[last & 0xf]; \
540 while (inptr < inend) {
547 } else if (c == '\n') {
548 if (last != -1 && last != '\r') {
556 if (camel_mime_is_qpsafe (last)) {
564 if (camel_mime_is_qpsafe (c)) {
571 /* delay output of space gchar */
572 if (c == ' ' || c == '\t') {
588 *outptr++ = tohex[(c >> 4) & 0xf];
589 *outptr++ = tohex[c & 0xf];
599 return (outptr - out);
603 * FIXME: this does not strip trailing spaces from lines (as it should, rfc 2045, section 6.7)
604 * Should it also canonicalise the end of line to CR LF??
606 * Note: Trailing rubbish (at the end of input), like = or =x or =\r will be lost.
610 * camel_quoted_decode_step:
612 * @len: max length of data to decode
613 * @out: output stream
614 * @savestate: holds the number of bits that are stored in @save
615 * @saveme: leftover bits that have not yet been decoded
617 * Decodes a block of quoted-printable encoded data. Performs a
618 * 'decode step' on a chunk of QP encoded data.
620 * Returns: the number of bytes decoded
623 camel_quoted_decode_step (guchar *in,
629 register guchar *inptr, *outptr;
636 d (printf ("quoted-printable, decoding text '%.*s'\n", len, in));
641 while (inptr < inend) {
644 while (inptr < inend) {
650 #ifdef CANONICALISE_EOL
651 /*else if (c=='\r') {
653 } else if (c == '\n') {
666 /* soft break ... unix end of line */
675 if (isxdigit (c) && isxdigit (save)) {
677 save = toupper (save);
678 *outptr++ = (((save>='A'?save-'A'+10:save-'0')&0x0f) << 4)
679 | ((c >= 'A' ? c - 'A' + 10 : c - '0') &0x0f);
680 } else if (c == '\n' && save == '\r') {
681 /* soft break ... canonical end of line */
683 /* just output the data */
690 #ifdef CANONICALISE_EOL
692 /* convert \r -> to \r\n, leaves \r\n alone */
715 * this is for the "Q" encoding of international words,
716 * which is slightly different than plain quoted-printable (mainly by allowing 0x20 <> _)
719 quoted_decode (const guchar *in,
723 register const guchar *inptr;
724 register guchar *outptr;
732 d (printf ("decoding text '%.*s'\n", len, in));
735 while (inptr < inend) {
738 /* silently ignore truncated data? */
739 if (inend - in >= 2) {
740 c = toupper (*inptr++);
741 c1 = toupper (*inptr++);
742 *outptr++ = (((c>='A'?c-'A'+10:c-'0')&0x0f) << 4)
743 | ((c1 >= 'A' ? c1 - 'A' + 10 : c1 - '0') &0x0f);
748 } else if (c == '_') {
760 /* rfc2047 version of quoted-printable */
761 /* safemask is the mask to apply to the camel_mime_special_table to determine what
762 * characters can safely be included without encoding */
764 quoted_encode (const guchar *in,
769 register const guchar *inptr, *inend;
776 while (inptr < inend) {
780 } else if (camel_mime_special_table[c] & safemask) {
784 *outptr++ = tohex[(c >> 4) & 0xf];
785 *outptr++ = tohex[c & 0xf];
789 d (printf ("encoding '%.*s' = '%.*s'\n", len, in, outptr - out, out));
791 return (outptr - out);
795 header_decode_lwsp (const gchar **in)
797 const gchar *inptr = *in;
800 d2 (printf ("is ws: '%s'\n", *in));
802 while ((camel_mime_is_lwsp (*inptr) || *inptr =='(') && *inptr != '\0') {
803 while (camel_mime_is_lwsp (*inptr) && *inptr != '\0') {
804 d2 (printf ("(%c)", *inptr));
809 /* check for comments */
813 while (depth && (c=*inptr) && *inptr != '\0') {
814 if (c == '\\' && inptr[1]) {
816 } else if (c == '(') {
818 } else if (c == ')') {
829 camel_iconv_strndup (iconv_t cd,
833 gsize inleft, outleft, converted = 0;
839 if (cd == (iconv_t) -1)
840 return g_strndup (string, n);
843 out = g_malloc (outlen + 4);
850 outbuf = out + converted;
851 outleft = outlen - converted;
853 converted = iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
854 if (converted == (gsize) -1) {
855 if (errno != E2BIG && errno != EINVAL)
860 * E2BIG There is not sufficient room at *outbuf.
862 * We just need to grow our outbuffer and try again.
865 converted = outbuf - out;
866 if (errno == E2BIG) {
867 outlen += inleft * 2 + 16;
868 out = g_realloc (out, outlen + 4);
869 outbuf = out + converted;
871 } while (errno == E2BIG && inleft > 0);
874 * EINVAL An incomplete multibyte sequence has been encounÂ
875 * tered in the input.
877 * We'll just have to ignore it...
880 /* flush the iconv conversion */
881 while (iconv (cd, NULL, NULL, &outbuf, &outleft) == (gsize) -1) {
886 converted = outbuf - out;
887 out = g_realloc (out, outlen + 4);
888 outleft = outlen - converted;
889 outbuf = out + converted;
892 /* Note: not all charsets can be nul-terminated with a single
893 * nul byte. UCS2, for example, needs 2 nul bytes and UCS4
894 * needs 4. I hope that 4 nul bytes is enough to terminate all
895 * multibyte charsets? */
897 /* nul-terminate the string */
898 memset (outbuf, 0, 4);
901 iconv (cd, NULL, NULL, NULL, NULL);
909 w (g_warning ("camel_iconv_strndup: %s at byte %lu", g_strerror (errno), n - inleft));
914 iconv (cd, NULL, NULL, NULL, NULL);
921 #define is_ascii(c) isascii ((gint) ((guchar) (c)))
924 decode_8bit (const gchar *text,
926 const gchar *default_charset)
928 const gchar *charsets[4] = { "UTF-8", NULL, NULL, NULL };
929 gsize inleft, outleft, outlen, rc, min, n;
930 const gchar *locale_charset, *best;
936 if (default_charset && g_ascii_strcasecmp (default_charset, "UTF-8") != 0)
937 charsets[i++] = default_charset;
939 locale_charset = camel_iconv_locale_charset ();
940 if (locale_charset && g_ascii_strcasecmp (locale_charset, "UTF-8") != 0)
941 charsets[i++] = locale_charset;
946 outlen = (len * 2) + 16;
947 out = g_malloc (outlen + 1);
949 for (i = 0; charsets[i]; i++) {
950 if ((cd = camel_iconv_open ("UTF-8", charsets[i])) == (iconv_t) -1)
960 rc = iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
961 if (rc == (gsize) -1) {
962 if (errno == EINVAL) {
963 /* incomplete sequence at the end of the input buffer */
968 if (errno == E2BIG) {
969 outlen += (inleft * 2) + 16;
970 rc = (gsize) (outbuf - out);
971 out = g_realloc (out, outlen + 1);
972 outleft = outlen - rc;
980 } while (inleft > 0);
982 while ((rc = iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
987 rc = (gsize) (outbuf - out);
988 out = g_realloc (out, outlen + 1);
989 outleft = outlen - rc;
995 camel_iconv_close (cd);
997 if (rc != (gsize) -1 && n == 0)
1006 /* if we get here, then none of the charsets fit the 8bit text flawlessly...
1007 * try to find the one that fit the best and use that to convert what we can,
1008 * replacing any byte we can't convert with a '?' */
1010 if ((cd = camel_iconv_open ("UTF-8", best)) == (iconv_t) -1) {
1011 /* this shouldn't happen... but if we are here, then
1012 * it did... the only thing we can do at this point
1013 * is replace the 8bit garbage and pray */
1014 register const gchar *inptr = text;
1015 const gchar *inend = inptr + len;
1019 while (inptr < inend) {
1020 if (is_ascii (*inptr))
1021 *outbuf++ = *inptr++;
1037 rc = iconv (cd, (gchar **) &inbuf, &inleft, &outbuf, &outleft);
1038 if (rc == (gsize) -1) {
1039 if (errno == EINVAL) {
1040 /* incomplete sequence at the end of the input buffer */
1044 if (errno == E2BIG) {
1046 outlen += inleft * 2 + 16;
1047 out = g_realloc (out, outlen + 1);
1048 outleft = outlen - rc;
1057 } while (inleft > 0);
1059 while ((rc = iconv (cd, NULL, NULL, &outbuf, &outleft)) == (gsize) -1) {
1064 rc = (gsize) (outbuf - out);
1065 out = g_realloc (out, outlen + 1);
1066 outleft = outlen - rc;
1072 camel_iconv_close (cd);
1077 #define is_rfc2047_encoded_word(atom, len) (len >= 7 && !strncmp (atom, "=?", 2) && !strncmp (atom + len - 2, "?=", 2))
1079 /* decode an rfc2047 encoded-word token */
1081 rfc2047_decode_word (const gchar *in,
1083 const gchar *default_charset)
1085 const guchar *instart = (const guchar *) in;
1086 const guchar *inptr = instart + 2;
1087 const guchar *inend = instart + inlen - 2;
1089 const gchar *charset;
1098 /* skip over the charset */
1099 if (inlen < 8 || !(inptr = memchr (inptr, '?', inend - inptr)) || inptr[2] != '?')
1108 decoded = g_alloca (inend - inptr);
1109 declen = g_base64_decode_step ((gchar *) inptr, inend - inptr, decoded, &state, &save);
1114 decoded = g_alloca (inend - inptr);
1115 declen = quoted_decode (inptr, inend - inptr, decoded);
1118 d (fprintf (stderr, "encountered broken 'Q' encoding\n"));
1123 d (fprintf (stderr, "unknown encoding\n"));
1127 /* never return empty string, return rather NULL */
1131 len = (inptr - 3) - (instart + 2);
1132 charenc = g_alloca (len + 1);
1133 memcpy (charenc, in + 2, len);
1134 charenc[len] = '\0';
1137 /* rfc2231 updates rfc2047 encoded words...
1138 * The ABNF given in RFC 2047 for encoded-words is:
1139 * encoded-word := "=?" charset "?" encoding "?" encoded-text "?="
1140 * This specification changes this ABNF to:
1141 * encoded-word := "=?" charset ["*" language] "?" encoding "?" encoded-text "?="
1144 /* trim off the 'language' part if it's there... */
1145 if ((p = strchr (charset, '*')))
1148 /* slight optimization? */
1149 if (!g_ascii_strcasecmp (charset, "UTF-8")) {
1150 p = (gchar *) decoded;
1153 while (!g_utf8_validate (p, len, (const gchar **) &p)) {
1154 len = declen - (p - (gchar *) decoded);
1158 return g_strndup ((gchar *) decoded, declen);
1162 charset = camel_iconv_charset_name (charset);
1164 if (!charset[0] || (cd = camel_iconv_open ("UTF-8", charset)) == (iconv_t) -1) {
1166 "Cannot convert from %s to UTF-8, "
1167 "header display may be corrupt: %s",
1168 charset[0] ? charset : "unspecified charset",
1169 g_strerror (errno)));
1171 return decode_8bit ((gchar *) decoded, declen, default_charset);
1174 buf = camel_iconv_strndup (cd, (gchar *) decoded, declen);
1175 camel_iconv_close (cd);
1181 "Failed to convert \"%.*s\" to UTF-8, display may be "
1182 "corrupt: %s", declen, decoded, g_strerror (errno)));
1184 return decode_8bit ((gchar *) decoded, declen, charset);
1187 /* ok, a lot of mailers are BROKEN, and send iso-latin1 encoded
1188 * headers, when they should just be sticking to US-ASCII
1189 * according to the rfc's. Anyway, since the conversion to utf-8
1190 * is trivial, just do it here without iconv */
1192 append_latin1 (GString *out,
1202 out = g_string_append_c (out, 0xc0 | ((c >> 6) & 0x3)); /* 110000xx */
1203 out = g_string_append_c (out, 0x80 | (c & 0x3f)); /* 10xxxxxx */
1205 out = g_string_append_c (out, c);
1212 append_8bit (GString *out,
1215 const gchar *charset)
1217 gchar *outbase, *outbuf;
1221 ic = camel_iconv_open ("UTF-8", charset);
1222 if (ic == (iconv_t) -1)
1225 outlen = inlen * 6 + 16;
1226 outbuf = outbase = g_malloc (outlen);
1228 if (camel_iconv (ic, &inbuf, &inlen, &outbuf, &outlen) == (gsize) -1) {
1229 w (g_warning ("Conversion to '%s' failed: %s", charset, g_strerror (errno)));
1231 camel_iconv_close (ic);
1235 camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
1238 g_string_append (out, outbase);
1240 camel_iconv_close (ic);
1247 append_quoted_pair (GString *str,
1251 register const gchar *inptr = in;
1252 const gchar *inend = in + inlen;
1255 while (inptr < inend) {
1257 if (c == '\\' && inptr < inend)
1258 g_string_append_c (str, *inptr++);
1260 g_string_append_c (str, c);
1266 /* decodes a simple text, rfc822 + rfc2047 */
1268 header_decode_text (const gchar *in,
1270 const gchar *default_charset)
1272 register const gchar *inptr = in;
1273 gboolean encoded = FALSE;
1274 const gchar *lwsp, *text;
1281 return g_strdup ("");
1283 out = g_string_sized_new (strlen (in) + 1);
1285 while (*inptr != '\0') {
1287 while (camel_mime_is_lwsp (*inptr))
1290 nlwsp = (gsize) (inptr - lwsp);
1292 if (*inptr != '\0') {
1296 if (!strncmp (inptr, "=?", 2)) {
1299 /* skip past the charset (if one is even declared, sigh) */
1300 while (*inptr && *inptr != '?') {
1301 ascii = ascii && is_ascii (*inptr);
1305 /* sanity check encoding type */
1306 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
1311 /* find the end of the rfc2047 encoded word token */
1312 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
1313 ascii = ascii && is_ascii (*inptr);
1317 if (!strncmp (inptr, "?=", 2))
1321 /* stop if we encounter a possible rfc2047 encoded
1322 * token even if it's inside another word, sigh. */
1323 while (*inptr && !camel_mime_is_lwsp (*inptr) &&
1324 strncmp (inptr, "=?", 2) != 0) {
1325 ascii = ascii && is_ascii (*inptr);
1330 n = (gsize) (inptr - text);
1331 if (is_rfc2047_encoded_word (text, n)) {
1332 if ((decoded = rfc2047_decode_word (text, n, default_charset))) {
1333 /* rfc2047 states that you must ignore all
1334 * whitespace between encoded words */
1336 g_string_append_len (out, lwsp, nlwsp);
1338 g_string_append (out, decoded);
1343 /* append lwsp and invalid rfc2047 encoded-word token */
1344 g_string_append_len (out, lwsp, nlwsp + n);
1349 g_string_append_len (out, lwsp, nlwsp);
1351 /* append word token */
1353 /* *sigh* I hate broken mailers... */
1354 decoded = decode_8bit (text, n, default_charset);
1355 n = strlen (decoded);
1362 g_string_append_len (out, text, n);
1364 append_quoted_pair (out, text, n);
1371 /* appending trailing lwsp */
1372 g_string_append_len (out, lwsp, nlwsp);
1378 g_string_free (out, FALSE);
1384 * camel_header_decode_string:
1385 * @in: input header value string
1386 * @default_charset: default charset to use if improperly encoded
1388 * Decodes rfc2047 encoded-word tokens
1390 * Returns: a string containing the UTF-8 version of the decoded header
1394 camel_header_decode_string (const gchar *in,
1395 const gchar *default_charset)
1400 return header_decode_text (in, FALSE, default_charset);
1404 * camel_header_format_ctext:
1405 * @in: input header value string
1406 * @default_charset: default charset to use if improperly encoded
1408 * Decodes a header which contains rfc2047 encoded-word tokens that
1409 * may or may not be within a comment.
1411 * Returns: a string containing the UTF-8 version of the decoded header
1415 camel_header_format_ctext (const gchar *in,
1416 const gchar *default_charset)
1421 return header_decode_text (in, TRUE, default_charset);
1424 /* how long a sequence of pre-encoded words should be less than, to attempt to
1425 * fit into a properly folded word. Only a guide. */
1426 #define CAMEL_FOLD_PREENCODED (24)
1428 /* FIXME: needs a way to cache iconv opens for different charsets? */
1430 rfc2047_encode_word (GString *outstring,
1436 iconv_t ic = (iconv_t) -1;
1437 gchar *buffer, *out, *ascii;
1438 gsize inlen, outlen, enclen, bufflen;
1439 const gchar *inptr, *p;
1442 d (printf ("Converting [%d] '%.*s' to %s\n", len, len, in, type));
1444 /* convert utf8->encoding */
1445 bufflen = len * 6 + 16;
1446 buffer = g_alloca (bufflen);
1450 ascii = g_alloca (bufflen);
1452 if (g_ascii_strcasecmp (type, "UTF-8") != 0)
1453 ic = camel_iconv_open (type, "UTF-8");
1456 gssize convlen, proclen;
1459 /* break up words into smaller bits, what we really want is encoded + overhead < 75,
1460 * but we'll just guess what that means in terms of input chars, and assume its good enough */
1465 if (ic == (iconv_t) -1) {
1466 /* native encoding case, the easy one (?) */
1467 /* we work out how much we can convert, and still be in length */
1468 /* proclen will be the result of input characters that we can convert, to the nearest
1469 * (approximated) valid utf8 gchar */
1474 while (p < (in + len) && convlen < (75 - strlen ("=?utf-8?q?\?="))) {
1482 if (camel_mime_special_table[c] & safemask)
1488 if (proclen >= 0 && proclen < i && convlen < (75 - strlen ("=?utf-8?q?\?=")))
1491 /* well, we probably have broken utf8, just copy it anyway what the heck */
1492 if (proclen == -1) {
1493 w (g_warning ("Appear to have truncated utf8 sequence"));
1497 memcpy (out, inptr, proclen);
1502 /* well we could do similar, but we can't (without undue effort), we'll just break it up into
1503 * hopefully-small-enough chunks, and leave it at that */
1504 convlen = MIN (inlen, CAMEL_FOLD_PREENCODED);
1506 if (camel_iconv (ic, &inptr, (gsize *) &convlen, &out, &outlen) == (gsize) -1 && errno != EINVAL) {
1507 w (g_warning ("Conversion problem: conversion truncated: %s", g_strerror (errno)));
1508 /* blah, we include it anyway, better than infinite loop ... */
1511 /* make sure we flush out any shift state */
1512 camel_iconv (ic, NULL, NULL, &out, &outlen);
1514 inlen -= (inptr - p);
1517 enclen = out - buffer;
1526 out += sprintf (out, "=?%s?Q?", type);
1527 out += quoted_encode ((guchar *) buffer, enclen, (guchar *) out, safemask);
1528 sprintf (out, "?=");
1530 d (printf ("converted part = %s\n", ascii));
1532 g_string_append (outstring, ascii);
1536 if (ic != (iconv_t) -1)
1537 camel_iconv_close (ic);
1541 header_encode_string_rfc2047 (const guchar *in,
1542 gboolean include_lwsp)
1544 const guchar *inptr = in, *start, *word;
1545 gboolean last_was_encoded = FALSE;
1546 gboolean last_was_space = FALSE;
1547 const gchar *charset;
1552 g_return_val_if_fail (g_utf8_validate ((const gchar *) in, -1, NULL), NULL);
1557 /* do a quick us-ascii check (the common case?) */
1564 return g_strdup ((gchar *) in);
1566 /* This gets each word out of the input, and checks to see what charset
1567 * can be used to encode it. */
1568 /* TODO: Work out when to merge subsequent words, or across word-parts */
1569 out = g_string_new ("");
1574 while (inptr && *inptr) {
1576 const gchar *newinptr;
1578 newinptr = g_utf8_next_char (inptr);
1579 c = g_utf8_get_char ((gchar *) inptr);
1580 if (newinptr == NULL || !g_unichar_validate (c)) {
1582 "Invalid UTF-8 sequence encountered "
1583 "(pos %d, gchar '%c'): %s",
1584 (inptr - in), inptr[0], in));
1589 if (c < 256 && !include_lwsp && camel_mime_is_lwsp (c) && !last_was_space) {
1590 /* we've reached the end of a 'word' */
1591 if (word && !(last_was_encoded && encoding)) {
1592 /* output lwsp between non-encoded words */
1593 g_string_append_len (out, (const gchar *) start, word - start);
1599 g_string_append_len (out, (const gchar *) start, inptr - start);
1600 last_was_encoded = FALSE;
1603 if (last_was_encoded)
1604 g_string_append_c (out, ' ');
1606 rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1607 last_was_encoded = TRUE;
1610 if (last_was_encoded)
1611 g_string_append_c (out, ' ');
1613 if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1615 rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1616 last_was_encoded = TRUE;
1620 last_was_space = TRUE;
1624 } else if (c > 127 && c < 256) {
1625 encoding = MAX (encoding, 1);
1626 last_was_space = FALSE;
1627 } else if (c >= 256) {
1628 encoding = MAX (encoding, 2);
1629 last_was_space = FALSE;
1630 } else if (include_lwsp || !camel_mime_is_lwsp (c)) {
1631 last_was_space = FALSE;
1634 if (!(c < 256 && !include_lwsp && camel_mime_is_lwsp (c)) && !word)
1637 inptr = (const guchar *) newinptr;
1640 if (inptr - start) {
1641 if (word && !(last_was_encoded && encoding)) {
1642 g_string_append_len (out, (const gchar *) start, word - start);
1648 g_string_append_len (out, (const gchar *) start, inptr - start);
1651 if (last_was_encoded)
1652 g_string_append_c (out, ' ');
1654 rfc2047_encode_word (out, (const gchar *) start, inptr - start, "ISO-8859-1", CAMEL_MIME_IS_ESAFE);
1657 if (last_was_encoded)
1658 g_string_append_c (out, ' ');
1660 if (!(charset = camel_charset_best ((const gchar *) start, inptr - start)))
1662 rfc2047_encode_word (out, (const gchar *) start, inptr - start, charset, CAMEL_MIME_IS_ESAFE);
1668 g_string_free (out, FALSE);
1673 /* TODO: Should this worry about quotes?? */
1675 * camel_header_encode_string:
1678 * Encodes a 'text' header according to the rules of rfc2047.
1680 * Returns: the rfc2047 encoded header
1683 camel_header_encode_string (const guchar *in)
1685 return header_encode_string_rfc2047 (in, FALSE);
1688 /* apply quoted-string rules to a string */
1690 quote_word (GString *out,
1697 /* TODO: What about folding on long lines? */
1699 g_string_append_c (out, '"');
1700 for (i = 0; i < len; i++) {
1702 if (c == '\"' || c == '\\' || c == '\r')
1703 g_string_append_c (out, '\\');
1704 g_string_append_c (out, c);
1707 g_string_append_c (out, '"');
1710 /* incrementing possibility for the word type */
1711 enum _phrase_word_t {
1717 struct _phrase_word {
1718 const guchar *start, *end;
1719 enum _phrase_word_t type;
1724 word_types_compatable (enum _phrase_word_t type1,
1725 enum _phrase_word_t type2)
1729 return type2 == WORD_QSTRING;
1731 return type2 != WORD_2047;
1733 return type2 == WORD_2047;
1739 /* split the input into words with info about each word
1740 * merge common word types clean up */
1742 header_encode_phrase_get_words (const guchar *in)
1744 const guchar *inptr = in, *start, *last;
1745 struct _phrase_word *word;
1746 enum _phrase_word_t type;
1747 gint encoding, count = 0;
1748 GList *words = NULL;
1750 /* break the input into words */
1755 while (inptr && *inptr) {
1757 const gchar *newinptr;
1759 newinptr = g_utf8_next_char (inptr);
1760 c = g_utf8_get_char ((gchar *) inptr);
1762 if (!g_unichar_validate (c)) {
1764 "Invalid UTF-8 sequence encountered "
1765 "(pos %d, gchar '%c'): %s",
1766 (inptr - in), inptr[0], in));
1771 inptr = (const guchar *) newinptr;
1772 if (g_unichar_isspace (c)) {
1774 word = g_new0 (struct _phrase_word, 1);
1775 word->start = start;
1778 word->encoding = encoding;
1779 words = g_list_append (words, word);
1789 if (!camel_mime_is_atom (c))
1790 type = MAX (type, WORD_QSTRING);
1791 } else if (c > 127 && c < 256) {
1793 encoding = MAX (encoding, 1);
1794 } else if (c >= 256) {
1796 encoding = MAX (encoding, 2);
1804 word = g_new0 (struct _phrase_word, 1);
1805 word->start = start;
1808 word->encoding = encoding;
1809 words = g_list_append (words, word);
1815 #define MERGED_WORD_LT_FOLDLEN(wordlen, type) ((type) == WORD_2047 ? (wordlen) < CAMEL_FOLD_PREENCODED : (wordlen) < (CAMEL_FOLD_SIZE - 8))
1818 header_encode_phrase_merge_words (GList **wordsp)
1820 GList *wordl, *nextl, *words = *wordsp;
1821 struct _phrase_word *word, *next;
1822 gboolean merged = FALSE;
1824 /* scan the list, checking for words of similar types that can be merged */
1828 nextl = g_list_next (wordl);
1832 /* merge nodes of the same type AND we are not creating too long a string */
1833 if (word_types_compatable (word->type, next->type)) {
1834 if (MERGED_WORD_LT_FOLDLEN (next->end - word->start, MAX (word->type, next->type))) {
1835 /* the resulting word type is the MAX of the 2 types */
1836 word->type = MAX (word->type, next->type);
1837 word->encoding = MAX (word->encoding, next->encoding);
1838 word->end = next->end;
1839 words = g_list_remove_link (words, nextl);
1840 g_list_free_1 (nextl);
1843 nextl = g_list_next (wordl);
1847 /* if it is going to be too long, make sure we include the
1848 * separating whitespace */
1849 word->end = next->start;
1857 wordl = g_list_next (wordl);
1865 /* encodes a phrase sequence (different quoting/encoding rules to strings) */
1867 * camel_header_encode_phrase:
1868 * @in: header to encode
1870 * Encodes a 'phrase' header according to the rules in rfc2047.
1872 * Returns: the encoded 'phrase'
1875 camel_header_encode_phrase (const guchar *in)
1877 struct _phrase_word *word = NULL, *last_word = NULL;
1878 GList *words, *wordl;
1879 const gchar *charset;
1886 words = header_encode_phrase_get_words (in);
1890 while (header_encode_phrase_merge_words (&words))
1893 out = g_string_new ("");
1895 /* output words now with spaces between them */
1903 /* append correct number of spaces between words */
1904 if (last_word && !(last_word->type == WORD_2047 && word->type == WORD_2047)) {
1905 /* one or both of the words are not encoded so we write the spaces out untouched */
1906 len = word->start - last_word->end;
1907 out = g_string_append_len (out, (gchar *) last_word->end, len);
1910 switch (word->type) {
1912 out = g_string_append_len (out, (gchar *) word->start, word->end - word->start);
1915 quote_word (out, TRUE, (gchar *) word->start, word->end - word->start);
1918 if (last_word && last_word->type == WORD_2047) {
1919 /* include the whitespace chars between these 2 words in the
1920 * resulting rfc2047 encoded word. */
1921 len = word->end - last_word->end;
1922 start = (const gchar *) last_word->end;
1924 /* encoded words need to be separated by linear whitespace */
1925 g_string_append_c (out, ' ');
1927 len = word->end - word->start;
1928 start = (const gchar *) word->start;
1931 if (word->encoding == 1) {
1932 rfc2047_encode_word (out, start, len, "ISO-8859-1", CAMEL_MIME_IS_PSAFE);
1934 if (!(charset = camel_charset_best (start, len)))
1936 rfc2047_encode_word (out, start, len, charset, CAMEL_MIME_IS_PSAFE);
1942 wordl = g_list_next (wordl);
1947 /* and we no longer need the list */
1949 g_list_free (words);
1952 g_string_free (out, FALSE);
1957 /* these are all internal parser functions */
1960 decode_token (const gchar **in)
1962 const gchar *inptr = *in;
1965 header_decode_lwsp (&inptr);
1967 while (camel_mime_is_ttoken (*inptr))
1969 if (inptr > start) {
1971 return g_strndup (start, inptr - start);
1978 * camel_header_token_decode:
1981 * Gets the first token in the string according to the rules of
1984 * Returns: a new string containing the first token in @in
1987 camel_header_token_decode (const gchar *in)
1992 return decode_token (&in);
1996 * <"> * ( <any gchar except <"> \, cr / \ <any char> ) <">
1999 header_decode_quoted_string (const gchar **in)
2001 const gchar *inptr = *in;
2002 gchar *out = NULL, *outptr;
2006 header_decode_lwsp (&inptr);
2007 if (*inptr == '"') {
2011 /* first, calc length */
2014 while ( (c = *intmp++) && c!= '"') {
2015 if (c == '\\' && *intmp) {
2020 outlen = intmp - inptr - skip;
2021 out = outptr = g_malloc (outlen + 1);
2022 while ( (c = *inptr) && c!= '"') {
2024 if (c == '\\' && *inptr) {
2038 header_decode_atom (const gchar **in)
2040 const gchar *inptr = *in, *start;
2042 header_decode_lwsp (&inptr);
2044 while (camel_mime_is_atom (*inptr))
2048 return g_strndup (start, inptr - start);
2054 extract_rfc2047_encoded_word (const gchar **in,
2057 const gchar *inptr = *in, *start;
2059 header_decode_lwsp (&inptr);
2062 if (!strncmp (inptr, "=?", 2)) {
2065 /* skip past the charset (if one is even declared, sigh) */
2066 while (*inptr && *inptr != '?') {
2070 /* sanity check encoding type */
2071 if (inptr[0] != '?' || !strchr ("BbQq", inptr[1]) || !inptr[1] || inptr[2] != '?')
2076 /* find the end of the rfc2047 encoded word token */
2077 while (*inptr && strncmp (inptr, "?=", 2) != 0) {
2081 if (!strncmp (inptr, "?=", 2)) {
2085 *word = g_strndup (start, inptr - start);
2095 header_decode_word (const gchar **in)
2097 const gchar *inptr = *in;
2100 header_decode_lwsp (&inptr);
2103 if (*inptr == '"') {
2104 return header_decode_quoted_string (in);
2105 } else if (*inptr == '=' && inptr[1] == '?' && extract_rfc2047_encoded_word (in, &word) && word) {
2108 return header_decode_atom (in);
2113 header_decode_value (const gchar **in)
2115 const gchar *inptr = *in;
2117 header_decode_lwsp (&inptr);
2118 if (*inptr == '"') {
2119 d (printf ("decoding quoted string\n"));
2120 return header_decode_quoted_string (in);
2121 } else if (camel_mime_is_ttoken (*inptr)) {
2122 d (printf ("decoding token\n"));
2123 /* this may not have the right specials for all params? */
2124 return decode_token (in);
2129 /* should this return -1 for no int? */
2132 * camel_header_decode_int:
2133 * @in: pointer to input string
2135 * Extracts an integer token from @in and updates the pointer to point
2136 * to after the end of the integer token (sort of like strtol).
2138 * Returns: the gint value
2141 camel_header_decode_int (const gchar **in)
2143 const gchar *inptr = *in;
2146 header_decode_lwsp (&inptr);
2147 while ( (c=*inptr++ & 0xff)
2149 v = v * 10 + (c - '0');
2155 #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
2158 hex_decode (const gchar *in,
2161 const guchar *inend = (const guchar *) (in + len);
2162 guchar *inptr, *outptr;
2165 outbuf = (gchar *) g_malloc (len + 1);
2166 outptr = (guchar *) outbuf;
2168 inptr = (guchar *) in;
2169 while (inptr < inend) {
2170 if (*inptr == '%') {
2171 if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
2172 *outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
2175 *outptr++ = *inptr++;
2177 *outptr++ = *inptr++;
2185 /* Tries to convert @in @from charset @to charset. Any failure, we get no data out rather than partial conversion */
2187 header_convert (const gchar *to,
2194 gchar *outbuf, *outbase, *result = NULL;
2196 ic = camel_iconv_open (to, from);
2197 if (ic == (iconv_t) -1)
2200 outlen = inlen * 6 + 16;
2201 outbuf = outbase = g_malloc (outlen);
2203 ret = camel_iconv (ic, &in, &inlen, &outbuf, &outlen);
2204 if (ret != (gsize) -1) {
2205 camel_iconv (ic, NULL, NULL, &outbuf, &outlen);
2207 result = g_strdup (outbase);
2209 camel_iconv_close (ic);
2215 /* an rfc2184 encoded string looks something like:
2216 * us-ascii'en'This%20is%20even%20more%20
2220 rfc2184_decode (const gchar *in,
2223 const gchar *inptr = in;
2224 const gchar *inend = in + len;
2225 const gchar *charset;
2226 gchar *decoded, *decword, *encoding;
2228 inptr = memchr (inptr, '\'', len);
2232 encoding = g_alloca (inptr - in + 1);
2233 memcpy (encoding, in, inptr - in);
2234 encoding[inptr - in] = 0;
2235 charset = camel_iconv_charset_name (encoding);
2237 inptr = memchr (inptr + 1, '\'', inend - inptr - 1);
2244 decword = hex_decode (inptr, inend - inptr);
2245 decoded = header_convert ("UTF-8", charset, decword, strlen (decword));
2252 * camel_header_param:
2253 * @params: parameters
2254 * @name: name of param to find
2256 * Searches @params for a param named @name and gets the value.
2258 * Returns: the value of the @name param
2261 camel_header_param (struct _camel_header_param *params,
2264 while (params && params->name &&
2265 g_ascii_strcasecmp (params->name, name) != 0)
2266 params = params->next;
2268 return params->value;
2274 * camel_header_set_param:
2275 * @paramsp: poinetr to a list of params
2276 * @name: name of param to set
2277 * @value: value to set
2279 * Set a parameter in the list.
2281 * Returns: the set param
2283 struct _camel_header_param *
2284 camel_header_set_param (struct _camel_header_param **l,
2288 struct _camel_header_param *p = (struct _camel_header_param *) l, *pn;
2295 if (!g_ascii_strcasecmp (pn->name, name)) {
2298 pn->value = g_strdup (value);
2313 pn = g_malloc (sizeof (*pn));
2315 pn->name = g_strdup (name);
2316 pn->value = g_strdup (value);
2323 * camel_content_type_param:
2324 * @content_type: a #CamelContentType
2325 * @name: name of param to find
2327 * Searches the params on s #CamelContentType for a param named @name
2328 * and gets the value.
2330 * Returns: the value of the @name param
2333 camel_content_type_param (CamelContentType *t,
2338 return camel_header_param (t->params, name);
2342 * camel_content_type_set_param:
2343 * @content_type: a #CamelContentType
2344 * @name: name of param to set
2345 * @value: value of param to set
2347 * Set a parameter on @content_type.
2350 camel_content_type_set_param (CamelContentType *t,
2354 camel_header_set_param (&t->params, name, value);
2358 * camel_content_type_is:
2359 * @content_type: A content type specifier, or %NULL.
2360 * @type: A type to check against.
2361 * @subtype: A subtype to check against, or "*" to match any subtype.
2363 * The subtype of "*" will match any subtype. If @ct is %NULL, then
2364 * it will match the type "text/plain".
2366 * Returns: %TRUE if the content type @ct is of type @type/@subtype or
2370 camel_content_type_is (CamelContentType *ct,
2372 const gchar *subtype)
2374 /* no type == text/plain or text/"*" */
2375 if (ct == NULL || (ct->type == NULL && ct->subtype == NULL)) {
2376 return (!g_ascii_strcasecmp (type, "text")
2377 && (!g_ascii_strcasecmp (subtype, "plain")
2378 || !strcmp (subtype, "*")));
2381 return (ct->type != NULL
2382 && (!g_ascii_strcasecmp (ct->type, type)
2383 && ((ct->subtype != NULL
2384 && !g_ascii_strcasecmp (ct->subtype, subtype))
2385 || !strcmp ("*", subtype))));
2389 * camel_header_param_list_free:
2390 * @params: a list of params
2392 * Free the list of params.
2395 camel_header_param_list_free (struct _camel_header_param *p)
2397 struct _camel_header_param *n;
2409 * camel_content_type_new:
2410 * @type: the major type of the new content-type
2411 * @subtype: the subtype
2413 * Create a new #CamelContentType.
2415 * Returns: the new #CamelContentType
2418 camel_content_type_new (const gchar *type,
2419 const gchar *subtype)
2421 CamelContentType *t;
2423 t = g_slice_new (CamelContentType);
2424 t->type = g_strdup (type);
2425 t->subtype = g_strdup (subtype);
2433 * camel_content_type_ref:
2434 * @content_type: a #CamelContentType
2436 * Refs the content type.
2439 camel_content_type_ref (CamelContentType *ct)
2446 * camel_content_type_unref:
2447 * @content_type: a #CamelContentType
2449 * Unrefs, and potentially frees, the content type.
2452 camel_content_type_unref (CamelContentType *ct)
2455 if (ct->refcount <= 1) {
2456 camel_header_param_list_free (ct->params);
2458 g_free (ct->subtype);
2459 g_slice_free (CamelContentType, ct);
2467 /* for decoding email addresses, canonically */
2469 header_decode_domain (const gchar **in)
2471 const gchar *inptr = *in;
2474 GString *domain = g_string_new ("");
2476 /* domain ref | domain literal */
2477 header_decode_lwsp (&inptr);
2479 if (*inptr == '[') { /* domain literal */
2480 domain = g_string_append_c (domain, '[');
2482 header_decode_lwsp (&inptr);
2483 while (*inptr && camel_mime_is_dtext (*inptr)) {
2484 domain = g_string_append_c (domain, *inptr);
2487 if (*inptr == ']') {
2488 domain = g_string_append_c (domain, ']');
2491 w (g_warning ("closing ']' not found in domain: %s", *in));
2494 gchar *a = header_decode_atom (&inptr);
2496 domain = g_string_append (domain, a);
2499 w (g_warning ("missing atom from domain-ref"));
2503 header_decode_lwsp (&inptr);
2504 if (*inptr == '.') { /* next sub-domain? */
2505 domain = g_string_append_c (domain, '.');
2507 header_decode_lwsp (&inptr);
2515 g_string_free (domain, FALSE);
2520 header_decode_addrspec (const gchar **in)
2522 const gchar *inptr = *in;
2524 GString *addr = g_string_new ("");
2526 header_decode_lwsp (&inptr);
2529 word = header_decode_word (&inptr);
2531 addr = g_string_append (addr, word);
2532 header_decode_lwsp (&inptr);
2534 while (*inptr == '.' && word) {
2536 addr = g_string_append_c (addr, '.');
2537 word = header_decode_word (&inptr);
2539 addr = g_string_append (addr, word);
2540 header_decode_lwsp (&inptr);
2543 w (g_warning ("Invalid address spec: %s", *in));
2546 if (*inptr == '@') {
2548 addr = g_string_append_c (addr, '@');
2549 word = header_decode_domain (&inptr);
2551 addr = g_string_append (addr, word);
2554 w (g_warning ("Invalid address, missing domain: %s", *in));
2557 w (g_warning ("Invalid addr-spec, missing @: %s", *in));
2560 w (g_warning ("invalid addr-spec, no local part"));
2561 g_string_free (addr, TRUE);
2566 /* FIXME: return null on error? */
2570 g_string_free (addr, FALSE);
2576 * word *('.' word) @ domain |
2577 * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain |
2579 * 1 * word ':'[ word ... etc (mailbox, as above) ] ';'
2583 * word *( '.' word ) '@' domain
2584 * *(word) '<' [ *('@' domain ) ':' ] word *( '.' word) @ domain
2587 static struct _camel_header_address *
2588 header_decode_mailbox (const gchar **in,
2589 const gchar *charset)
2591 const gchar *inptr = *in;
2593 gint closeme = FALSE;
2595 GString *name = NULL;
2596 struct _camel_header_address *address = NULL;
2597 const gchar *comment = NULL;
2599 addr = g_string_new ("");
2601 /* for each address */
2602 pre = header_decode_word (&inptr);
2603 header_decode_lwsp (&inptr);
2604 if (!(*inptr == '.' || *inptr == '@' || *inptr == ',' || *inptr == '\0')) {
2605 /* ',' and '\0' required incase it is a simple address, no @ domain part (buggy writer) */
2606 name = g_string_new ("");
2610 /* perform internationalised decoding, and append */
2611 text = camel_header_decode_string (pre, charset);
2612 g_string_append (name, text);
2616 pre = header_decode_word (&inptr);
2618 gsize l = strlen (last);
2619 gsize p = strlen (pre);
2621 /* dont append ' ' between sucsessive encoded words */
2622 if ((l > 6 && last[l - 2] == '?' && last[l - 1] == '=')
2623 && (p > 6 && pre[0] == '=' && pre[1] == '?')) {
2624 /* dont append ' ' */
2626 name = g_string_append_c (name, ' ');
2629 /* Fix for stupidly-broken-mailers that like to put '.''s in names unquoted */
2631 while (!pre && *inptr && *inptr != '<') {
2632 w (g_warning ("Working around stupid mailer bug #5: unescaped characters in names"));
2633 name = g_string_append_c (name, *inptr++);
2634 pre = header_decode_word (&inptr);
2639 header_decode_lwsp (&inptr);
2640 if (*inptr == '<') {
2644 header_decode_lwsp (&inptr);
2645 if (*inptr == '@') {
2646 while (*inptr == '@') {
2648 header_decode_domain (&inptr);
2649 header_decode_lwsp (&inptr);
2650 if (*inptr == ',') {
2652 header_decode_lwsp (&inptr);
2655 if (*inptr == ':') {
2658 w (g_warning ("broken route-address, missing ':': %s", *in));
2661 pre = header_decode_word (&inptr);
2662 /*header_decode_lwsp(&inptr);*/
2664 w (g_warning ("broken address? %s", *in));
2669 addr = g_string_append (addr, pre);
2671 w (g_warning ("No local-part for email address: %s", *in));
2674 /* should be at word '.' localpart */
2675 while (*inptr == '.' && pre) {
2678 pre = header_decode_word (&inptr);
2679 addr = g_string_append_c (addr, '.');
2681 addr = g_string_append (addr, pre);
2683 header_decode_lwsp (&inptr);
2687 /* now at '@' domain part */
2688 if (*inptr == '@') {
2692 addr = g_string_append_c (addr, '@');
2694 dom = header_decode_domain (&inptr);
2695 addr = g_string_append (addr, dom);
2697 } else if (*inptr != '>' || !closeme) {
2698 /* If we get a <, the address was probably a name part, lets try again shall we? */
2699 /* Another fix for seriously-broken-mailers */
2700 if (*inptr && *inptr != ',') {
2702 const gchar *name_part;
2705 w (g_warning ("We didn't get an '@' where we expected in '%s', trying again", *in));
2706 w (g_warning ("Name is '%s', Addr is '%s' we're at '%s'\n", name ? name->str:"<UNSET>", addr->str, inptr));
2708 /* need to keep *inptr, as try_address_again will drop the current character */
2712 g_string_append_c (addr, *inptr);
2716 while (*name_part && *name_part != ',') {
2717 if (*name_part == '\"')
2718 in_quote = !in_quote;
2719 else if (!in_quote && *name_part == '<')
2724 if (*name_part == '<' && ((!strchr (name_part, ',') && strchr (name_part, '>')) || (strchr (name_part, ',') > strchr (name_part, '>')))) {
2725 /* it's of a form "display-name <addr-spec>" */
2727 g_string_free (name, TRUE);
2729 g_string_free (addr, TRUE);
2731 if (name_part == *in)
2732 addr = g_string_new ("");
2734 addr = g_string_new_len (*in, name_part - *in - (camel_mime_is_lwsp (name_part[-1]) ? 1 : 0));
2737 /* check for address is encoded word ... */
2738 text = camel_header_decode_string (addr->str, charset);
2741 addr = g_string_new ("");
2743 g_string_truncate (name, 0);
2744 g_string_append (name, text);
2747 g_string_append (name, text ? text : addr->str);
2748 g_string_truncate (addr, 0);
2752 /* or maybe that we've added up a bunch of broken bits to make an encoded word */
2753 if ((text = rfc2047_decode_word (name->str, name->len, charset))) {
2754 g_string_truncate (name, 0);
2755 g_string_append (name, text);
2759 goto try_address_again;
2761 w (g_warning ("invalid address, no '@' domain part at %c: %s", *inptr, *in));
2765 header_decode_lwsp (&inptr);
2766 if (*inptr == '>') {
2769 w (g_warning ("invalid route address, no closing '>': %s", *in));
2771 } else if (name == NULL && comment != NULL && inptr>comment) { /* check for comment after address */
2773 const gchar *comstart, *comend;
2775 /* this is a bit messy, we go from the last known position, because
2776 * decode_domain/etc skip over any comments on the way */
2777 /* FIXME: This wont detect comments inside the domain itself,
2778 * but nobody seems to use that feature anyway ... */
2780 d (printf ("checking for comment from '%s'\n", comment));
2782 comstart = strchr (comment, '(');
2785 header_decode_lwsp (&inptr);
2787 while (comend > comstart && comend[0] != ')')
2790 if (comend > comstart) {
2791 d (printf (" looking at subset '%.*s'\n", comend - comstart, comstart));
2792 tmp = g_strndup (comstart, comend - comstart);
2793 text = camel_header_decode_string (tmp, charset);
2794 name = g_string_new (text);
2803 if (addr->len > 0) {
2804 if (!g_utf8_validate (addr->str, addr->len, NULL)) {
2805 /* workaround for invalid addr-specs containing 8bit chars (see bug #42170 for details) */
2806 const gchar *locale_charset;
2809 locale_charset = camel_iconv_locale_charset ();
2811 out = g_string_new ("");
2813 if ((charset == NULL || !append_8bit (out, addr->str, addr->len, charset))
2814 && (locale_charset == NULL || !append_8bit (out, addr->str, addr->len, locale_charset)))
2815 append_latin1 (out, addr->str, addr->len);
2817 g_string_free (addr, TRUE);
2824 text = rfc2047_decode_word (addr->str, addr->len, charset);
2826 g_string_truncate (addr, 0);
2827 g_string_append (addr, text);
2833 address = camel_header_address_new_name (name ? name->str : "", addr->str);
2836 d (printf ("got mailbox: %s\n", addr->str));
2838 g_string_free (addr, TRUE);
2840 g_string_free (name, TRUE);
2845 static struct _camel_header_address *
2846 header_decode_address (const gchar **in,
2847 const gchar *charset)
2849 const gchar *inptr = *in;
2851 GString *group = g_string_new ("");
2852 struct _camel_header_address *addr = NULL, *member;
2854 /* pre-scan, trying to work out format, discard results */
2855 header_decode_lwsp (&inptr);
2856 while ((pre = header_decode_word (&inptr))) {
2857 group = g_string_append (group, pre);
2858 group = g_string_append (group, " ");
2861 header_decode_lwsp (&inptr);
2862 if (*inptr == ':') {
2863 d (printf ("group detected: %s\n", group->str));
2864 addr = camel_header_address_new_group (group->str);
2865 /* that was a group spec, scan mailbox's */
2867 /* FIXME: check rfc 2047 encodings of words, here or above in the loop */
2868 header_decode_lwsp (&inptr);
2869 if (*inptr != ';') {
2872 member = header_decode_mailbox (&inptr, charset);
2874 camel_header_address_add_member (addr, member);
2875 header_decode_lwsp (&inptr);
2881 if (*inptr == ';') {
2884 w (g_warning ("Invalid group spec, missing closing ';': %s", *in));
2891 addr = header_decode_mailbox (in, charset);
2894 g_string_free (group, TRUE);
2900 header_msgid_decode_internal (const gchar **in)
2902 const gchar *inptr = *in;
2903 gchar *msgid = NULL;
2905 d (printf ("decoding Message-ID: '%s'\n", *in));
2907 header_decode_lwsp (&inptr);
2908 if (*inptr == '<') {
2910 header_decode_lwsp (&inptr);
2911 msgid = header_decode_addrspec (&inptr);
2913 header_decode_lwsp (&inptr);
2914 if (*inptr == '>') {
2917 w (g_warning ("Missing closing '>' on message id: %s", *in));
2920 w (g_warning ("Cannot find message id in: %s", *in));
2923 w (g_warning ("missing opening '<' on message id: %s", *in));
2931 * camel_header_msgid_decode:
2934 * Extract a message-id token from @in.
2936 * Returns: the msg-id
2939 camel_header_msgid_decode (const gchar *in)
2944 return header_msgid_decode_internal (&in);
2948 * camel_header_contentid_decode:
2951 * Extract a content-id from @in.
2953 * Returns: the extracted content-id
2956 camel_header_contentid_decode (const gchar *in)
2958 const gchar *inptr = in;
2959 gboolean at = FALSE;
2963 d (printf ("decoding Content-ID: '%s'\n", in));
2965 header_decode_lwsp (&inptr);
2967 /* some lame mailers quote the Content-Id */
2971 /* make sure the content-id is not "" which can happen if we get a
2972 * content-id such as <.@> (which Eudora likes to use...) */
2973 if ((buf = camel_header_msgid_decode (inptr)) != NULL && *buf)
2978 /* ugh, not a valid msg-id - try to get something useful out of it then? */
2980 header_decode_lwsp (&inptr);
2981 if (*inptr == '<') {
2983 header_decode_lwsp (&inptr);
2986 /* Eudora has been known to use <.@> as a content-id */
2987 if (!(buf = header_decode_word (&inptr)) && !strchr (".@", *inptr))
2990 addr = g_string_new ("");
2991 header_decode_lwsp (&inptr);
2992 while (buf != NULL || *inptr == '.' || (*inptr == '@' && !at)) {
2994 g_string_append (addr, buf);
3000 if (*inptr == '.') {
3001 g_string_append_c (addr, *inptr++);
3002 buf = header_decode_word (&inptr);
3003 } else if (*inptr == '@') {
3004 g_string_append_c (addr, *inptr++);
3005 buf = header_decode_word (&inptr);
3008 } else if (strchr (".[]", *inptr)) {
3009 g_string_append_c (addr, *inptr++);
3010 buf = header_decode_atom (&inptr);
3013 header_decode_lwsp (&inptr);
3017 g_string_free (addr, FALSE);
3023 camel_header_references_list_append_asis (struct _camel_header_references **list,
3026 struct _camel_header_references *w = (struct _camel_header_references *) list, *n;
3029 n = g_malloc (sizeof (*n));
3036 camel_header_references_list_size (struct _camel_header_references **list)
3039 struct _camel_header_references *w = *list;
3048 camel_header_references_list_clear (struct _camel_header_references **list)
3050 struct _camel_header_references *w = *list, *n;
3061 header_references_decode_single (const gchar **in,
3062 struct _camel_header_references **head)
3064 struct _camel_header_references *ref;
3065 const gchar *inptr = *in;
3069 header_decode_lwsp (&inptr);
3070 if (*inptr == '<') {
3071 id = header_msgid_decode_internal (&inptr);
3073 ref = g_malloc (sizeof (struct _camel_header_references));
3080 word = header_decode_word (&inptr);
3083 else if (*inptr != '\0')
3084 inptr++; /* Stupid mailer tricks */
3091 /* TODO: why is this needed? Can't the other interface also work? */
3092 struct _camel_header_references *
3093 camel_header_references_inreplyto_decode (const gchar *in)
3095 struct _camel_header_references *ref = NULL;
3097 if (in == NULL || in[0] == '\0')
3100 header_references_decode_single (&in, &ref);
3105 /* generate a list of references, from most recent up */
3106 struct _camel_header_references *
3107 camel_header_references_decode (const gchar *in)
3109 struct _camel_header_references *refs = NULL;
3111 if (in == NULL || in[0] == '\0')
3115 header_references_decode_single (&in, &refs);
3120 struct _camel_header_references *
3121 camel_header_references_dup (const struct _camel_header_references *list)
3123 struct _camel_header_references *new = NULL, *tmp;
3126 tmp = g_new (struct _camel_header_references, 1);
3128 tmp->id = g_strdup (list->id);
3135 struct _camel_header_address *
3136 camel_header_mailbox_decode (const gchar *in,
3137 const gchar *charset)
3142 return header_decode_mailbox (&in, charset);
3145 struct _camel_header_address *
3146 camel_header_address_decode (const gchar *in,
3147 const gchar *charset)
3149 const gchar *inptr = in, *last;
3150 struct _camel_header_address *list = NULL, *addr;
3152 d (printf ("decoding To: '%s'\n", in));
3157 header_decode_lwsp (&inptr);
3163 addr = header_decode_address (&inptr, charset);
3165 camel_header_address_list_append (&list, addr);
3166 header_decode_lwsp (&inptr);
3171 } while (inptr != last);
3174 w (g_warning ("Invalid input detected at %c (%d): %s\n or at: %s", *inptr, inptr - in, in, inptr));
3177 if (inptr == last) {
3178 w (g_warning ("detected invalid input loop at : %s", last));
3184 struct _camel_header_newsgroup *
3185 camel_header_newsgroups_decode (const gchar *in)
3187 const gchar *inptr = in;
3189 struct _camel_header_newsgroup *head, *last, *ng;
3193 last = (struct _camel_header_newsgroup *) &head;
3196 header_decode_lwsp (&inptr);
3198 while ((c = *inptr++) && !camel_mime_is_lwsp (c) && c != ',')
3200 if (start != inptr - 1) {
3201 ng = g_malloc (sizeof (*ng));
3202 ng->newsgroup = g_strndup (start, inptr - start - 1);
3213 camel_header_newsgroups_free (struct _camel_header_newsgroup *ng)
3216 struct _camel_header_newsgroup *nng = ng->next;
3218 g_free (ng->newsgroup);
3224 /* this must be kept in sync with the header */
3225 static const gchar *encodings[] = {
3236 camel_transfer_encoding_to_string (CamelTransferEncoding encoding)
3238 if (encoding >= G_N_ELEMENTS (encodings))
3241 return encodings[encoding];
3244 CamelTransferEncoding
3245 camel_transfer_encoding_from_string (const gchar *string)
3249 if (string != NULL) {
3250 for (i = 0; i < G_N_ELEMENTS (encodings); i++)
3251 if (!g_ascii_strcasecmp (string, encodings[i]))
3255 return CAMEL_TRANSFER_ENCODING_DEFAULT;
3259 camel_header_mime_decode (const gchar *in,
3263 const gchar *inptr = in;
3264 gint major=-1, minor=-1;
3266 d (printf ("decoding MIME-Version: '%s'\n", in));
3269 header_decode_lwsp (&inptr);
3270 if (isdigit (*inptr)) {
3271 major = camel_header_decode_int (&inptr);
3272 header_decode_lwsp (&inptr);
3273 if (*inptr == '.') {
3275 header_decode_lwsp (&inptr);
3276 if (isdigit (*inptr))
3277 minor = camel_header_decode_int (&inptr);
3287 d (printf ("major = %d, minor = %d\n", major, minor));
3290 struct _rfc2184_param {
3291 struct _camel_header_param param;
3296 rfc2184_param_cmp (gconstpointer ap,
3299 const struct _rfc2184_param *a = *(gpointer *) ap;
3300 const struct _rfc2184_param *b = *(gpointer *) bp;
3303 res = strcmp (a->param.name, b->param.name);
3305 if (a->index > b->index)
3307 else if (a->index < b->index)
3314 /* NB: Steals name and value */
3315 static struct _camel_header_param *
3316 header_append_param (struct _camel_header_param *last,
3320 struct _camel_header_param *node;
3323 * 8 bit data in parameters, illegal, tries to convert using locale, or just safens it up.
3324 * rfc2047 ecoded parameters, illegal, decodes them anyway. Some Outlook & Mozilla do this?
3326 node = g_malloc (sizeof (*node));
3330 if (strncmp (value, "=?", 2) == 0
3331 && (node->value = header_decode_text (value, FALSE, NULL))) {
3333 } else if (g_ascii_strcasecmp (name, "boundary") != 0 && !g_utf8_validate (value, -1, NULL)) {
3334 const gchar *charset = camel_iconv_locale_charset ();
3336 if ((node->value = header_convert ("UTF-8", charset ? charset:"ISO-8859-1", value, strlen (value)))) {
3339 node->value = value;
3340 for (;*value; value++)
3341 if (!isascii ((guchar) * value))
3345 node->value = value;
3350 static struct _camel_header_param *
3351 header_decode_param_list (const gchar **in)
3353 struct _camel_header_param *head = NULL, *last = (struct _camel_header_param *) &head;
3354 GPtrArray *split = NULL;
3355 const gchar *inptr = *in;
3356 struct _rfc2184_param *work;
3359 /* Dump parameters into the output list, in the order found. RFC 2184 split parameters are kept in an array */
3360 header_decode_lwsp (&inptr);
3361 while (*inptr == ';') {
3363 gchar *value = NULL;
3366 name = decode_token (&inptr);
3367 header_decode_lwsp (&inptr);
3368 if (*inptr == '=') {
3370 value = header_decode_value (&inptr);
3373 if (name && value) {
3374 gchar *index = strchr (name, '*');
3377 if (index[1] == 0) {
3378 /* VAL*="foo", decode immediately and append */
3380 tmp = rfc2184_decode (value, strlen (value));
3385 last = header_append_param (last, name, value);
3387 /* VAL*1="foo", save for later */
3389 work = g_malloc (sizeof (*work));
3390 work->param.name = name;
3391 work->param.value = value;
3392 work->index = atoi (index);
3394 split = g_ptr_array_new ();
3395 g_ptr_array_add (split, work);
3398 last = header_append_param (last, name, value);
3405 header_decode_lwsp (&inptr);
3408 /* Rejoin any RFC 2184 split parameters in the proper order */
3409 /* Parameters with the same index will be concatenated in undefined order */
3411 GString *value = g_string_new ("");
3412 struct _rfc2184_param *first;
3415 qsort (split->pdata, split->len, sizeof (split->pdata[0]), rfc2184_param_cmp);
3416 first = split->pdata[0];
3417 for (i = 0; i < split->len; i++) {
3418 work = split->pdata[i];
3419 if (split->len - 1 == i)
3420 g_string_append (value, work->param.value);
3421 if (split->len - 1 == i || strcmp (work->param.name, first->param.name) != 0) {
3422 tmp = rfc2184_decode (value->str, value->len);
3424 tmp = g_strdup (value->str);
3426 last = header_append_param (last, g_strdup (first->param.name), tmp);
3427 g_string_truncate (value, 0);
3430 if (split->len - 1 != i)
3431 g_string_append (value, work->param.value);
3433 g_string_free (value, TRUE);
3434 for (i = 0; i < split->len; i++) {
3435 work = split->pdata[i];
3436 g_free (work->param.name);
3437 g_free (work->param.value);
3440 g_ptr_array_free (split, TRUE);
3448 struct _camel_header_param *
3449 camel_header_param_list_decode (const gchar *in)
3454 return header_decode_param_list (&in);
3458 header_encode_param (const guchar *in,
3460 gboolean is_filename)
3462 const guchar *inptr = in;
3463 guchar *outbuf = NULL;
3464 const gchar *charset;
3471 g_return_val_if_fail (in != NULL, NULL);
3474 if (!g_utf8_validate ((gchar *) inptr, -1, NULL)) {
3475 GString *buff = g_string_new ("");
3477 for (; inptr && *inptr; inptr++) {
3479 g_string_append_printf (buff, "%%%02X", (*inptr) & 0xFF);
3481 g_string_append_c (buff, *inptr);
3484 outbuf = (guchar *) g_string_free (buff, FALSE);
3488 /* do not set encoded flag for file names */
3489 str = header_encode_string_rfc2047 (inptr, TRUE);
3495 /* if we have really broken utf8 passed in, we just treat it as binary data */
3497 charset = camel_charset_best ((gchar *) in, strlen ((gchar *) in));
3498 if (charset == NULL) {
3499 return g_strdup ((gchar *) in);
3502 if (g_ascii_strcasecmp (charset, "UTF-8") != 0) {
3503 if ((outbuf = (guchar *) header_convert (charset, "UTF-8", (const gchar *) in, strlen ((gchar *) in))))
3506 return g_strdup ((gchar *) in);
3509 /* FIXME: set the 'language' as well, assuming we can get that info...? */
3510 out = g_string_new (charset);
3511 g_string_append (out, "''");
3513 while ((c = *inptr++)) {
3514 if (camel_mime_is_attrchar (c))
3515 g_string_append_c (out, c);
3517 g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
3522 g_string_free (out, FALSE);
3528 /* HACK: Set to non-zero when you want the 'filename' and 'name' headers encoded in RFC 2047 way,
3529 * otherwise they will be encoded in the correct RFC 2231 way. It's because Outlook and GMail
3530 * do not understand the correct standard and refuse attachments with localized name sent
3531 * from evolution. This seems to have been fixed in Exchange 2007 at least - not sure about
3532 * standalone Outlook. */
3533 gint camel_header_param_encode_filenames_in_rfc_2047 = 0;
3536 camel_header_param_list_format_append (GString *out,
3537 struct _camel_header_param *p)
3539 gint used = out->len;
3542 gboolean is_filename = camel_header_param_encode_filenames_in_rfc_2047 && (g_ascii_strcasecmp (p->name, "filename") == 0 || g_ascii_strcasecmp (p->name, "name") == 0);
3543 gboolean encoded = FALSE;
3544 gboolean quote = FALSE;
3545 gint here = out->len;
3554 value = header_encode_param ((guchar *) p->value, &encoded, is_filename);
3556 w (g_warning ("appending parameter %s=%s violates rfc2184", p->name, p->value));
3557 value = g_strdup (p->value);
3563 for (ch = value; ch && *ch; ch++) {
3564 if (camel_mime_is_tspecial (*ch) || camel_mime_is_lwsp (*ch))
3571 quote = quote || is_filename;
3572 nlen = strlen (p->name);
3573 vlen = strlen (value);
3575 /* do not fold file names */
3576 if (!is_filename && used + nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3577 out = g_string_append (out, ";\n\t");
3581 out = g_string_append (out, "; ");
3583 if (!is_filename && nlen + vlen > CAMEL_FOLD_SIZE - 8) {
3584 /* we need to do special rfc2184 parameter wrapping */
3585 gint maxlen = CAMEL_FOLD_SIZE - (nlen + 8);
3586 gchar *inptr, *inend;
3590 inend = value + vlen;
3592 while (inptr < inend) {
3593 gchar *ptr = inptr + MIN (inend - inptr, maxlen);
3595 if (encoded && ptr < inend) {
3596 /* be careful not to break an encoded gchar (ie %20) */
3600 for (; j > 0 && q > inptr && *q != '%'; j--, q--);
3606 g_string_append (out, ";\n\t");
3611 g_string_append_printf (out, "%s*%d%s=", p->name, i++, encoded ? "*" : "");
3612 if (encoded || !quote)
3613 g_string_append_len (out, inptr, ptr - inptr);
3615 quote_word (out, TRUE, inptr, ptr - inptr);
3617 d (printf ("wrote: %s\n", out->str + here));
3619 used += (out->len - here);
3624 g_string_append_printf (out, "%s%s=", p->name, encoded ? "*" : "");
3626 /* Quote even if we don't need to in order to
3627 * work around broken mail software like the
3628 * Jive Forums' NNTP gateway */
3629 if (encoded /*|| !quote */)
3630 g_string_append (out, value);
3632 quote_word (out, TRUE, value, vlen);
3634 used += (out->len - here);
3644 camel_header_param_list_format (struct _camel_header_param *p)
3646 GString *out = g_string_new ("");
3649 camel_header_param_list_format_append (out, p);
3651 g_string_free (out, FALSE);
3656 camel_content_type_decode (const gchar *in)
3658 const gchar *inptr = in;
3659 gchar *type, *subtype = NULL;
3660 CamelContentType *t = NULL;
3665 type = decode_token (&inptr);
3666 header_decode_lwsp (&inptr);
3668 if (*inptr == '/') {
3670 subtype = decode_token (&inptr);
3672 if (subtype == NULL && (!g_ascii_strcasecmp (type, "text"))) {
3673 w (g_warning ("text type with no subtype, resorting to text/plain: %s", in));
3674 subtype = g_strdup ("plain");
3676 if (subtype == NULL) {
3677 w (g_warning ("MIME type with no subtype: %s", in));
3680 t = camel_content_type_new (type, subtype);
3681 t->params = header_decode_param_list (&inptr);
3686 d (printf ("cannot find MIME type in header (2) '%s'", in));
3692 camel_content_type_dump (CamelContentType *ct)
3694 struct _camel_header_param *p;
3696 printf ("Content-Type: ");
3698 printf ("<NULL>\n");
3701 printf ("%s / %s", ct->type, ct->subtype);
3705 printf (";\n\t%s=\"%s\"", p->name, p->value);
3713 camel_content_type_format (CamelContentType *ct)
3721 out = g_string_new ("");
3722 if (ct->type == NULL) {
3723 g_string_append_printf (out, "text/plain");
3724 w (g_warning ("Content-Type with no main type"));
3725 } else if (ct->subtype == NULL) {
3726 w (g_warning ("Content-Type with no sub type: %s", ct->type));
3727 if (!g_ascii_strcasecmp (ct->type, "multipart"))
3728 g_string_append_printf (out, "%s/mixed", ct->type);
3730 g_string_append_printf (out, "%s", ct->type);
3732 g_string_append_printf (out, "%s/%s", ct->type, ct->subtype);
3734 camel_header_param_list_format_append (out, ct->params);
3737 g_string_free (out, FALSE);
3743 camel_content_type_simple (CamelContentType *ct)
3745 if (ct->type == NULL) {
3746 w (g_warning ("Content-Type with no main type"));
3747 return g_strdup ("text/plain");
3748 } else if (ct->subtype == NULL) {
3749 w (g_warning ("Content-Type with no sub type: %s", ct->type));
3750 if (!g_ascii_strcasecmp (ct->type, "multipart"))
3751 return g_strdup_printf ("%s/mixed", ct->type);
3753 return g_strdup (ct->type);
3755 return g_strdup_printf ("%s/%s", ct->type, ct->subtype);
3759 camel_content_transfer_encoding_decode (const gchar *in)
3762 return decode_token (&in);
3767 CamelContentDisposition *
3768 camel_content_disposition_decode (const gchar *in)
3770 CamelContentDisposition *d = NULL;
3771 const gchar *inptr = in;
3776 d = g_malloc (sizeof (*d));
3778 d->disposition = decode_token (&inptr);
3779 if (d->disposition == NULL) {
3780 w (g_warning ("Empty disposition type"));
3782 d->params = header_decode_param_list (&inptr);
3787 camel_content_disposition_ref (CamelContentDisposition *d)
3794 camel_content_disposition_unref (CamelContentDisposition *d)
3797 if (d->refcount <= 1) {
3798 camel_header_param_list_free (d->params);
3799 g_free (d->disposition);
3808 camel_content_disposition_format (CamelContentDisposition *d)
3816 out = g_string_new ("");
3818 out = g_string_append (out, d->disposition);
3820 out = g_string_append (out, "attachment");
3821 camel_header_param_list_format_append (out, d->params);
3824 g_string_free (out, FALSE);
3828 /* date parser macros */
3829 #define NUMERIC_CHARS "1234567890"
3830 #define WEEKDAY_CHARS "SundayMondayTuesdayWednesdayThursdayFridaySaturday"
3831 #define MONTH_CHARS "JanuaryFebruaryMarchAprilMayJuneJulyAugustSeptemberOctoberNovemberDecember"
3832 #define TIMEZONE_ALPHA_CHARS "UTCGMTESTEDTCSTCDTMSTPSTPDTZAMNY()"
3833 #define TIMEZONE_NUMERIC_CHARS "-+1234567890"
3834 #define TIME_CHARS "1234567890:"
3836 #define DATE_TOKEN_NON_NUMERIC (1 << 0)
3837 #define DATE_TOKEN_NON_WEEKDAY (1 << 1)
3838 #define DATE_TOKEN_NON_MONTH (1 << 2)
3839 #define DATE_TOKEN_NON_TIME (1 << 3)
3840 #define DATE_TOKEN_HAS_COLON (1 << 4)
3841 #define DATE_TOKEN_NON_TIMEZONE_ALPHA (1 << 5)
3842 #define DATE_TOKEN_NON_TIMEZONE_NUMERIC (1 << 6)
3843 #define DATE_TOKEN_HAS_SIGN (1 << 7)
3845 static guchar camel_datetok_table[256] = {
3846 128,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3847 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3848 111,111,111,111,111,111,111,111, 79, 79,111,175,111,175,111,111,
3849 38, 38, 38, 38, 38, 38, 38, 38, 38, 38,119,111,111,111,111,111,
3850 111, 75,111, 79, 75, 79,105, 79,111,111,107,111,111, 73, 75,107,
3851 79,111,111, 73, 77, 79,111,109,111, 79, 79,111,111,111,111,111,
3852 111,105,107,107,109,105,111,107,105,105,111,111,107,107,105,105,
3853 107,111,105,105,105,105,107,111,111,105,111,111,111,111,111,111,
3854 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3855 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3856 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3857 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3858 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3859 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3860 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3861 111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,111,
3870 { "EST", -500 }, /* these are all US timezones. bloody yanks */
3885 static const gchar tm_months[][4] = {
3886 "Jan", "Feb", "Mar", "Apr", "May", "Jun",
3887 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
3890 static const gchar tm_days[][4] = {
3891 "Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
3895 * camel_header_format_date:
3896 * @date: time_t date representation
3897 * @tz_offset: Timezone offset
3899 * Allocates a string buffer containing the rfc822 formatted date
3900 * string represented by @time and @tz_offset.
3902 * Returns: a valid string representation of the date.
3905 camel_header_format_date (time_t date,
3910 d (printf ("offset = %d\n", tz_offset));
3912 d (printf ("converting date %s", ctime (&date)));
3914 date += ((tz_offset / 100) * (60 * 60)) + (tz_offset % 100) * 60;
3916 d (printf ("converting date %s", ctime (&date)));
3918 gmtime_r (&date, &tm);
3920 return g_strdup_printf (
3921 "%s, %02d %s %04d %02d:%02d:%02d %+05d",
3922 tm_days[tm.tm_wday],
3924 tm_months[tm.tm_mon],
3932 /* This is where it gets ugly... */
3934 struct _date_token {
3935 struct _date_token *next;
3941 static struct _date_token *
3942 datetok (const gchar *date)
3944 struct _date_token *tokens = NULL, *token, *tail = (struct _date_token *) &tokens;
3945 const gchar *start, *end;
3950 /* kill leading whitespace */
3951 while (*start == ' ' || *start == '\t')
3957 mask = camel_datetok_table[(guchar) *start];
3959 /* find the end of this token */
3961 while (*end && !strchr ("-/,\t\r\n ", *end))
3962 mask |= camel_datetok_table[(guchar) *end++];
3965 token = g_malloc (sizeof (struct _date_token));
3967 token->start = start;
3968 token->len = end - start;
3985 decode_int (const gchar *in,
3988 register const gchar *inptr;
3989 gint sign = 1, val = 0;
3995 if (*inptr == '-') {
3998 } else if (*inptr == '+')
4001 for (; inptr < inend; inptr++) {
4002 if (!(*inptr >= '0' && *inptr <= '9'))
4005 val = (val * 10) + (*inptr - '0');
4015 get_days_in_month (gint month,
4033 if (g_date_is_leap_year (year))
4044 get_wday (const gchar *in,
4049 g_return_val_if_fail (in != NULL, -1);
4054 for (wday = 0; wday < 7; wday++) {
4055 if (!g_ascii_strncasecmp (in, tm_days[wday], 3))
4059 return -1; /* unknown week day */
4063 get_mday (const gchar *in,
4068 g_return_val_if_fail (in != NULL, -1);
4070 mday = decode_int (in, inlen);
4072 if (mday < 0 || mday > 31)
4079 get_month (const gchar *in,
4084 g_return_val_if_fail (in != NULL, -1);
4089 for (i = 0; i < 12; i++) {
4090 if (!g_ascii_strncasecmp (in, tm_months[i], 3))
4094 return -1; /* unknown month */
4098 get_year (const gchar *in,
4103 g_return_val_if_fail (in != NULL, -1);
4105 if ((year = decode_int (in, inlen)) == -1)
4109 year += (year < 70) ? 2000 : 1900;
4118 get_time (const gchar *in,
4124 register const gchar *inptr;
4125 gint *val, colons = 0;
4128 *hour = *min = *sec = 0;
4132 for (inptr = in; inptr < inend; inptr++) {
4133 if (*inptr == ':') {
4145 } else if (!(*inptr >= '0' && *inptr <= '9'))
4148 *val = (*val * 10) + (*inptr - '0');
4155 get_tzone (struct _date_token **token)
4157 const gchar *inptr, *inend;
4161 for (i = 0; *token && i < 2; *token = (*token)->next, i++) {
4162 inptr = (*token)->start;
4163 inlen = (*token)->len;
4164 inend = inptr + inlen;
4166 if (*inptr == '+' || *inptr == '-') {
4167 return decode_int (inptr, inlen);
4169 if (*inptr == '(') {
4171 if (*(inend - 1) == ')')
4177 for (t = 0; t < 15; t++) {
4178 gsize len = strlen (tz_offsets[t].name);
4183 if (!strncmp (inptr, tz_offsets[t].name, len))
4184 return tz_offsets[t].offset;
4193 parse_rfc822_date (struct _date_token *tokens,
4196 gint hour, min, sec, offset, n;
4197 struct _date_token *token;
4201 g_return_val_if_fail (tokens != NULL, (time_t) 0);
4205 memset ((gpointer) &tm, 0, sizeof (struct tm));
4207 if ((n = get_wday (token->start, token->len)) != -1) {
4208 /* not all dates may have this... */
4210 token = token->next;
4214 if (!token || (n = get_mday (token->start, token->len)) == -1)
4218 token = token->next;
4221 if (!token || (n = get_month (token->start, token->len)) == -1)
4225 token = token->next;
4228 if (!token || (n = get_year (token->start, token->len)) == -1)
4231 tm.tm_year = n - 1900;
4232 token = token->next;
4234 /* get the hour/min/sec */
4235 if (!token || !get_time (token->start, token->len, &hour, &min, &sec))
4241 token = token->next;
4243 if (token && token->start && (
4244 g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4245 g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4246 /* not a valid RFC 822 time representation */
4250 /* get the timezone */
4251 if (!token || (n = get_tzone (&token)) == -1) {
4252 /* I guess we assume tz is GMT? */
4258 t = camel_mktime_utc (&tm);
4260 /* t is now GMT of the time we want, but not offset by the timezone ... */
4262 /* this should convert the time to the GMT equiv time */
4263 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4271 #define date_token_mask(t) (((struct _date_token *) t)->mask)
4272 #define is_numeric(t) ((date_token_mask (t) & DATE_TOKEN_NON_NUMERIC) == 0)
4273 #define is_weekday(t) ((date_token_mask (t) & DATE_TOKEN_NON_WEEKDAY) == 0)
4274 #define is_month(t) ((date_token_mask (t) & DATE_TOKEN_NON_MONTH) == 0)
4275 #define is_time(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIME) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_COLON))
4276 #define is_tzone_alpha(t) ((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_ALPHA) == 0)
4277 #define is_tzone_numeric(t) (((date_token_mask (t) & DATE_TOKEN_NON_TIMEZONE_NUMERIC) == 0) && (date_token_mask (t) & DATE_TOKEN_HAS_SIGN))
4278 #define is_tzone(t) (is_tzone_alpha (t) || is_tzone_numeric (t))
4281 parse_broken_date (struct _date_token *tokens,
4284 gboolean got_wday, got_month, got_tzone, is_pm;
4285 gint hour, min, sec, offset, n;
4286 struct _date_token *token;
4290 memset ((gpointer) &tm, 0, sizeof (struct tm));
4291 got_wday = got_month = got_tzone = FALSE;
4297 if (is_weekday (token) && !got_wday) {
4298 if ((n = get_wday (token->start, token->len)) != -1) {
4299 d (printf ("weekday; "));
4306 if (is_month (token) && !got_month) {
4307 if ((n = get_month (token->start, token->len)) != -1) {
4308 d (printf ("month; "));
4315 if (is_time (token) && !tm.tm_hour && !tm.tm_min && !tm.tm_sec) {
4316 if (get_time (token->start, token->len, &hour, &min, &sec)) {
4317 d (printf ("time; "));
4325 if (!got_tzone && token->start && (
4326 g_ascii_strncasecmp (token->start, "AM", 2) == 0 ||
4327 g_ascii_strncasecmp (token->start, "PM", 2) == 0)) {
4328 is_pm = g_ascii_strncasecmp (token->start, "PM", 2) == 0;
4333 if (is_tzone (token) && !got_tzone) {
4334 struct _date_token *t = token;
4336 if ((n = get_tzone (&t)) != -1) {
4337 d (printf ("tzone; "));
4344 if (is_numeric (token)) {
4345 if (token->len == 4 && !tm.tm_year) {
4346 if ((n = get_year (token->start, token->len)) != -1) {
4347 d (printf ("year; "));
4348 tm.tm_year = n - 1900;
4352 /* Note: assumes MM-DD-YY ordering if '0 < MM < 12' holds true */
4353 if (!got_month && token->next && is_numeric (token->next)) {
4354 if ((n = decode_int (token->start, token->len)) > 12) {
4357 d (printf ("mon; "));
4362 } else if (!tm.tm_mday && (n = get_mday (token->start, token->len)) != -1) {
4364 d (printf ("mday; "));
4367 } else if (!tm.tm_year) {
4368 if ((n = get_year (token->start, token->len)) != -1) {
4369 d (printf ("2-digit year; "));
4370 tm.tm_year = n - 1900;
4377 d (printf ("???; "));
4381 token = token->next;
4386 t = camel_mktime_utc (&tm);
4388 /* t is now GMT of the time we want, but not offset by the timezone ... */
4390 /* this should convert the time to the GMT equiv time */
4391 t -= ((offset / 100) * 60 * 60) + (offset % 100) * 60;
4403 * camel_header_decode_date:
4404 * @str: input date string
4405 * @tz_offset: timezone offset
4407 * Decodes the rfc822 date string and saves the GMT offset into
4408 * @tz_offset if non-NULL.
4410 * Returns: the time_t representation of the date string specified by
4411 * @str or (time_t) %0 on error. If @tz_offset is non-NULL, the value
4412 * of the timezone offset will be stored.
4415 camel_header_decode_date (const gchar *str,
4418 struct _date_token *token, *tokens;
4421 if (!str || !(tokens = datetok (str))) {
4428 if (!(date = parse_rfc822_date (tokens, tz_offset)))
4429 date = parse_broken_date (tokens, tz_offset);
4434 tokens = tokens->next;
4442 camel_header_location_decode (const gchar *in)
4445 GString *out = g_string_new ("");
4448 /* Sigh. RFC2557 says:
4449 * content-location = "Content-Location:" [CFWS] URI [CFWS]
4450 * where URI is restricted to the syntax for URLs as
4451 * defined in Uniform Resource Locators [URL] until
4452 * IETF specifies other kinds of URIs.
4454 * But Netscape puts quotes around the URI when sending web
4457 * Which is required as defined in rfc2017 [3.1]. Although
4458 * outlook doesn't do this.
4460 * Since we get headers already unfolded, we need just drop
4461 * all whitespace. URL's cannot contain whitespace or quoted
4462 * characters, even when included in quotes.
4465 header_decode_lwsp (&in);
4471 while ((c = *in++)) {
4472 if (quote && c == '"')
4474 if (!camel_mime_is_lwsp (c))
4475 g_string_append_c (out, c);
4478 res = g_strdup (out->str);
4479 g_string_free (out, TRUE);
4484 /* extra rfc checks */
4489 check_header (struct _camel_header_raw *header)
4493 cp = (guchar *) header->value;
4495 if (!isascii (*cp)) {
4496 w (g_warning ("Appending header violates rfc: %s: %s", header->name, header->value));
4505 camel_header_raw_append_parse (struct _camel_header_raw **list,
4506 const gchar *header,
4509 register const gchar *in;
4514 while (camel_mime_is_fieldname (*in) || *in == ':')
4516 fieldlen = in - header - 1;
4517 while (camel_mime_is_lwsp (*in))
4519 if (fieldlen == 0 || header[fieldlen] != ':') {
4520 printf ("Invalid header line: '%s'\n", header);
4523 name = g_alloca (fieldlen + 1);
4524 memcpy (name, header, fieldlen);
4527 camel_header_raw_append (list, name, in, offset);
4531 camel_header_raw_append (struct _camel_header_raw **list,
4536 struct _camel_header_raw *l, *n;
4538 d (printf ("Header: %s: %s\n", name, value));
4540 n = g_malloc (sizeof (*n));
4542 n->name = g_strdup (name);
4543 n->value = g_strdup (value);
4548 l = (struct _camel_header_raw *) list;
4556 if (!g_ascii_strcasecmp (name, "To")) {
4557 printf ("- Decoding To\n");
4558 camel_header_to_decode (value);
4559 } else if (!g_ascii_strcasecmp (name, "Content-type")) {
4560 printf ("- Decoding content-type\n");
4561 camel_content_type_dump (camel_content_type_decode (value));
4562 } else if (!g_ascii_strcasecmp (name, "MIME-Version")) {
4563 printf ("- Decoding mime version\n");
4564 camel_header_mime_decode (value);
4569 static struct _camel_header_raw *
4570 header_raw_find_node (struct _camel_header_raw **list,
4573 struct _camel_header_raw *l;
4577 if (!g_ascii_strcasecmp (l->name, name))
4585 camel_header_raw_find (struct _camel_header_raw **list,
4589 struct _camel_header_raw *l;
4591 l = header_raw_find_node (list, name);
4594 *offset = l->offset;
4601 camel_header_raw_find_next (struct _camel_header_raw **list,
4606 struct _camel_header_raw *l;
4608 if (last == NULL || name == NULL)
4612 while (l && l->value != last)
4614 return camel_header_raw_find (&l, name, offset);
4618 header_raw_free (struct _camel_header_raw *l)
4626 camel_header_raw_remove (struct _camel_header_raw **list,
4629 struct _camel_header_raw *l, *p;
4631 /* the next pointer is at the head of the structure, so this is safe */
4632 p = (struct _camel_header_raw *) list;
4635 if (!g_ascii_strcasecmp (l->name, name)) {
4637 header_raw_free (l);
4647 camel_header_raw_replace (struct _camel_header_raw **list,
4652 camel_header_raw_remove (list, name);
4653 camel_header_raw_append (list, name, value, offset);
4657 camel_header_raw_clear (struct _camel_header_raw **list)
4659 struct _camel_header_raw *l, *n;
4663 header_raw_free (l);
4670 camel_header_msgid_generate (void)
4672 static GMutex count_lock;
4673 #define COUNT_LOCK() g_mutex_lock (&count_lock)
4674 #define COUNT_UNLOCK() g_mutex_unlock (&count_lock)
4675 gchar host[MAXHOSTNAMELEN];
4677 static gint count = 0;
4680 struct addrinfo *ai = NULL, hints = { 0 };
4681 static gchar *cached_hostname = NULL;
4683 if (!cached_hostname) {
4684 retval = gethostname (host, sizeof (host));
4685 if (retval == 0 && *host) {
4686 hints.ai_flags = AI_CANONNAME;
4687 ai = camel_getaddrinfo (
4688 host, NULL, &hints, NULL, NULL);
4689 if (ai && ai->ai_canonname)
4690 name = ai->ai_canonname;
4694 name = "localhost.localdomain";
4696 cached_hostname = g_strdup (name);
4700 msgid = g_strdup_printf ("%d.%d.%d.camel@%s", (gint) time (NULL), getpid (), count++, cached_hostname);
4704 camel_freeaddrinfo (ai);
4711 const gchar *pattern;
4713 } mail_list_magic[] = {
4714 /* List-Post: <mailto:gnome-hackers@gnome.org> */
4715 /* List-Post: <mailto:gnome-hackers> */
4716 { "List-Post", "[ \t]*<mailto:([^@>]+)@?([^ \n\t\r>]*)" },
4717 /* List-Id: GNOME stuff <gnome-hackers.gnome.org> */
4718 /* List-Id: <gnome-hackers.gnome.org> */
4719 /* List-Id: <gnome-hackers> */
4720 /* This old one wasn't very useful: { "List-Id", " *([^<]+)" },*/
4721 { "List-Id", "[^<]*<([^\\.>]+)\\.?([^ \n\t\r>]*)" },
4722 /* Mailing-List: list gnome-hackers@gnome.org; contact gnome-hackers-owner@gnome.org */
4723 { "Mailing-List", "[ \t]*list ([^@]+)@?([^ \n\t\r>;]*)" },
4724 /* Originator: gnome-hackers@gnome.org */
4725 { "Originator", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4726 /* X-Mailing-List: <gnome-hackers@gnome.org> arcive/latest/100 */
4727 /* X-Mailing-List: gnome-hackers@gnome.org */
4728 /* X-Mailing-List: gnome-hackers */
4729 /* X-Mailing-List: <gnome-hackers> */
4730 { "X-Mailing-List", "[ \t]*<?([^@>]+)@?([^ \n\t\r>]*)" },
4731 /* X-Loop: gnome-hackers@gnome.org */
4732 { "X-Loop", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4733 /* X-List: gnome-hackers */
4734 /* X-List: gnome-hackers@gnome.org */
4735 { "X-List", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4736 /* Sender: owner-gnome-hackers@gnome.org */
4737 /* Sender: owner-gnome-hacekrs */
4738 { "Sender", "[ \t]*owner-([^@]+)@?([^ @\n\t\r>]*)" },
4739 /* Sender: gnome-hackers-owner@gnome.org */
4740 /* Sender: gnome-hackers-owner */
4741 { "Sender", "[ \t]*([^@]+)-owner@?([^ @\n\t\r>]*)" },
4742 /* Delivered-To: mailing list gnome-hackers@gnome.org */
4743 /* Delivered-To: mailing list gnome-hackers */
4744 { "Delivered-To", "[ \t]*mailing list ([^@]+)@?([^ \n\t\r>]*)" },
4745 /* Sender: owner-gnome-hackers@gnome.org */
4746 /* Sender: <owner-gnome-hackers@gnome.org> */
4747 /* Sender: owner-gnome-hackers */
4748 /* Sender: <owner-gnome-hackers> */
4749 { "Return-Path", "[ \t]*<?owner-([^@>]+)@?([^ \n\t\r>]*)" },
4750 /* X-BeenThere: gnome-hackers@gnome.org */
4751 /* X-BeenThere: gnome-hackers */
4752 { "X-BeenThere", "[ \t]*([^@]+)@?([^ \n\t\r>]*)" },
4753 /* List-Unsubscribe: <mailto:gnome-hackers-unsubscribe@gnome.org> */
4754 { "List-Unsubscribe", "<mailto:(.+)-unsubscribe@([^ \n\t\r>]*)" },
4758 mailing_list_init (gpointer param)
4760 gint i, errcode, failed = 0;
4762 /* precompile regex's for speed at runtime */
4763 for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4764 errcode = regcomp (&mail_list_magic[i].regex, mail_list_magic[i].pattern, REG_EXTENDED | REG_ICASE);
4769 len = regerror (errcode, &mail_list_magic[i].regex, NULL, 0);
4770 errstr = g_malloc0 (len + 1);
4771 regerror (errcode, &mail_list_magic[i].regex, errstr, len);
4773 g_warning ("Internal error, compiling regex failed: %s: %s", mail_list_magic[i].pattern, errstr);
4779 g_assert (failed == 0);
4785 camel_header_raw_check_mailing_list (struct _camel_header_raw **list)
4787 static GOnce once = G_ONCE_INIT;
4789 regmatch_t match[3];
4792 g_once (&once, mailing_list_init, NULL);
4794 for (i = 0; i < G_N_ELEMENTS (mail_list_magic); i++) {
4795 v = camel_header_raw_find (list, mail_list_magic[i].name, NULL);
4796 for (j = 0; j < 3; j++) {
4797 match[j].rm_so = -1;
4798 match[j].rm_eo = -1;
4800 if (v != NULL && regexec (&mail_list_magic[i].regex, v, 3, match, 0) == 0 && match[1].rm_so != -1) {
4804 len1 = match[1].rm_eo - match[1].rm_so;
4805 len2 = match[2].rm_eo - match[2].rm_so;
4807 mlist = g_malloc (len1 + len2 + 2);
4808 memcpy (mlist, v + match[1].rm_so, len1);
4811 memcpy (mlist + len1 + 1, v + match[2].rm_so, len2);
4812 mlist[len1 + len2 + 1] = '\0';
4824 /* ok, here's the address stuff, what a mess ... */
4825 struct _camel_header_address *
4826 camel_header_address_new (void)
4828 struct _camel_header_address *h;
4829 h = g_malloc0 (sizeof (*h));
4830 h->type = CAMEL_HEADER_ADDRESS_NONE;
4835 struct _camel_header_address *
4836 camel_header_address_new_name (const gchar *name,
4839 struct _camel_header_address *h;
4840 h = camel_header_address_new ();
4841 h->type = CAMEL_HEADER_ADDRESS_NAME;
4842 h->name = g_strdup (name);
4843 h->v.addr = g_strdup (addr);
4847 struct _camel_header_address *
4848 camel_header_address_new_group (const gchar *name)
4850 struct _camel_header_address *h;
4852 h = camel_header_address_new ();
4853 h->type = CAMEL_HEADER_ADDRESS_GROUP;
4854 h->name = g_strdup (name);
4859 camel_header_address_ref (struct _camel_header_address *h)
4866 camel_header_address_unref (struct _camel_header_address *h)
4869 if (h->refcount <= 1) {
4870 if (h->type == CAMEL_HEADER_ADDRESS_GROUP) {
4871 camel_header_address_list_clear (&h->v.members);
4872 } else if (h->type == CAMEL_HEADER_ADDRESS_NAME) {
4884 camel_header_address_set_name (struct _camel_header_address *h,
4889 h->name = g_strdup (name);
4894 camel_header_address_set_addr (struct _camel_header_address *h,
4898 if (h->type == CAMEL_HEADER_ADDRESS_NAME
4899 || h->type == CAMEL_HEADER_ADDRESS_NONE) {
4900 h->type = CAMEL_HEADER_ADDRESS_NAME;
4902 h->v.addr = g_strdup (addr);
4904 g_warning ("Trying to set the address on a group");
4910 camel_header_address_set_members (struct _camel_header_address *h,
4911 struct _camel_header_address *group)
4914 if (h->type == CAMEL_HEADER_ADDRESS_GROUP
4915 || h->type == CAMEL_HEADER_ADDRESS_NONE) {
4916 h->type = CAMEL_HEADER_ADDRESS_GROUP;
4917 camel_header_address_list_clear (&h->v.members);
4918 /* should this ref them? */
4919 h->v.members = group;
4921 g_warning ("Trying to set the members on a name, not group");
4927 camel_header_address_add_member (struct _camel_header_address *h,
4928 struct _camel_header_address *member)
4931 if (h->type == CAMEL_HEADER_ADDRESS_GROUP
4932 || h->type == CAMEL_HEADER_ADDRESS_NONE) {
4933 h->type = CAMEL_HEADER_ADDRESS_GROUP;
4934 camel_header_address_list_append (&h->v.members, member);
4940 camel_header_address_list_append_list (struct _camel_header_address **l,
4941 struct _camel_header_address **h)
4944 struct _camel_header_address *n = (struct _camel_header_address *) l;
4953 camel_header_address_list_append (struct _camel_header_address **l,
4954 struct _camel_header_address *h)
4957 camel_header_address_list_append_list (l, &h);
4963 camel_header_address_list_clear (struct _camel_header_address **l)
4965 struct _camel_header_address *a, *n;
4969 camel_header_address_unref (a);
4975 /* if encode is true, then the result is suitable for mailing, otherwise
4976 * the result is suitable for display only (and may not even be re-parsable) */
4978 header_address_list_encode_append (GString *out,
4980 struct _camel_header_address *a)
4986 case CAMEL_HEADER_ADDRESS_NAME:
4988 text = camel_header_encode_phrase ((guchar *) a->name);
4992 g_string_append_printf (out, "%s <%s>", text, a->v.addr);
4994 g_string_append (out, a->v.addr);
4998 case CAMEL_HEADER_ADDRESS_GROUP:
5000 text = camel_header_encode_phrase ((guchar *) a->name);
5003 g_string_append_printf (out, "%s: ", text);
5004 header_address_list_encode_append (out, encode, a->v.members);
5005 g_string_append_printf (out, ";");
5010 g_warning ("Invalid address type");
5015 g_string_append (out, ", ");
5020 camel_header_address_list_encode (struct _camel_header_address *a)
5028 out = g_string_new ("");
5029 header_address_list_encode_append (out, TRUE, a);
5031 g_string_free (out, FALSE);
5037 camel_header_address_list_format (struct _camel_header_address *a)
5045 out = g_string_new ("");
5047 header_address_list_encode_append (out, FALSE, a);
5049 g_string_free (out, FALSE);
5055 camel_header_address_fold (const gchar *in,
5059 const gchar *inptr = in, *space, *p, *n;
5062 gint i, needunfold = FALSE;
5067 /* first, check to see if we even need to fold */
5068 len = headerlen + 2;
5071 n = strchr (p, '\n');
5080 if (len >= CAMEL_FOLD_SIZE)
5085 if (len < CAMEL_FOLD_SIZE)
5086 return g_strdup (in);
5088 /* we need to fold, so first unfold (if we need to), then process */
5090 inptr = in = camel_header_unfold (in);
5092 out = g_string_new ("");
5093 outlen = headerlen + 2;
5095 space = strchr (inptr, ' ');
5097 len = space - inptr + 1;
5099 len = strlen (inptr);
5102 d (printf ("next word '%.*s'\n", len, inptr));
5104 if (outlen + len > CAMEL_FOLD_SIZE) {
5105 d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5106 /* strip trailing space */
5107 if (out->len > 0 && out->str[out->len - 1] == ' ')
5108 g_string_truncate (out, out->len - 1);
5109 g_string_append (out, "\n\t");
5114 for (i = 0; i < len; i++) {
5115 g_string_append_c (out, inptr[i]);
5121 g_string_free (out, FALSE);
5124 g_free ((gchar *) in);
5129 /* simple header folding */
5130 /* will work even if the header is already folded */
5132 camel_header_fold (const gchar *in,
5135 gsize len, outlen, tmplen;
5136 const gchar *inptr = in, *space, *p, *n;
5139 gint needunfold = FALSE;
5145 /* first, check to see if we even need to fold */
5146 len = headerlen + 2;
5149 n = strchr (p, '\n');
5158 if (len >= CAMEL_FOLD_SIZE)
5163 if (len < CAMEL_FOLD_SIZE)
5164 return g_strdup (in);
5166 /* we need to fold, so first unfold (if we need to), then process */
5168 inptr = in = camel_header_unfold (in);
5170 out = g_string_new ("");
5171 outlen = headerlen + 2;
5174 while (*space && *space != ' ' && *space != '\t')
5178 len = space - inptr + 1;
5180 len = space - inptr;
5182 d (printf ("next word '%.*s'\n", len, inptr));
5183 if (outlen + len > CAMEL_FOLD_SIZE) {
5184 d (printf ("outlen = %d wordlen = %d\n", outlen, len));
5185 /* strip trailing space */
5186 if (out->len > 0 && (out->str[out->len - 1] == ' ' || out->str[out->len - 1] == '\t')) {
5187 spc = out->str[out->len - 1];
5188 g_string_truncate (out, out->len - 1);
5189 g_string_append_c (out, '\n');
5190 g_string_append_c (out, spc);
5192 g_string_append (out, "\n\t");
5197 /* check for very long words, just cut them up */
5198 while (outlen + len > CAMEL_FOLD_MAX_SIZE) {
5199 tmplen = CAMEL_FOLD_MAX_SIZE - outlen;
5200 g_string_append_len (out, inptr, tmplen);
5201 g_string_append (out, "\n\t");
5208 g_string_append_len (out, inptr, len);
5213 g_string_free (out, FALSE);
5216 g_free ((gchar *) in);
5222 camel_header_unfold (const gchar *in)
5224 const gchar *inptr = in;
5230 out = g_malloc (strlen (in) + 1);
5233 while ((c = *inptr++)) {
5235 if (camel_mime_is_lwsp (*inptr)) {
5238 } while (camel_mime_is_lwsp (*inptr));