1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
3 * Copyright (C) 2000-2012 Jeffrey Stedfast
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public License
7 * as published by the Free Software Foundation; either version 2.1
8 * of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free
17 * Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
31 #include "gmime-param.h"
32 #include "gmime-common.h"
33 #include "gmime-table-private.h"
34 #include "gmime-parse-utils.h"
35 #include "gmime-iconv-utils.h"
36 #include "gmime-charset.h"
37 #include "gmime-utils.h"
38 #include "gmime-iconv.h"
41 #ifdef ENABLE_WARNINGS
45 #endif /* ENABLE_WARNINGS */
51 * SECTION: gmime-param
53 * @short_description: Content-Type and Content-Disposition parameters
54 * @see_also: #GMimeContentType
56 * A #GMimeParam is a parameter name/value pair as found on MIME
57 * header fields such as Content-Type and Content-Disposition.
61 static unsigned char tohex[16] = {
62 '0', '1', '2', '3', '4', '5', '6', '7',
63 '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
69 * @name: parameter name
70 * @value: parameter value
72 * Creates a new #GMimeParam node with name @name and value @value.
74 * Returns: a new paramter structure.
77 g_mime_param_new (const char *name, const char *value)
81 param = g_new (GMimeParam, 1);
84 param->name = g_strdup (name);
85 param->value = g_strdup (value);
90 #define INT_OVERFLOW(x,d) (((x) > (INT_MAX / 10)) || ((x) == (INT_MAX / 10) && (d) > (INT_MAX % 10)))
93 decode_int (const char **in)
95 const unsigned char *inptr;
100 inptr = (const unsigned char *) *in;
101 while (isdigit ((int) *inptr)) {
102 digit = (*inptr - '0');
103 if (INT_OVERFLOW (n, digit)) {
104 while (isdigit ((int) *inptr))
109 n = (n * 10) + digit;
114 *in = (const char *) inptr;
120 decode_quoted_string (const char **in)
122 const char *start, *inptr = *in;
123 char *outptr, *out = NULL;
124 gboolean unescape = FALSE;
126 decode_lwsp (&inptr);
135 while (*inptr && *inptr != '"') {
136 if (*inptr++ == '\\') {
144 out = g_strndup (start, (size_t) (inptr - start));
147 /* string wasn't properly quoted */
148 out = g_strndup (start, (size_t) (inptr - start));
154 inptr = outptr = out;
158 *outptr++ = *inptr++;
168 decode_token (const char **in)
170 const char *inptr = *in;
173 decode_lwsp (&inptr);
177 while (is_ttoken (*inptr))
180 /* Broken mail clients like to make our lives difficult. Scan
181 * for a ';' instead of trusting that the client followed the
183 while (*inptr && *inptr != ';')
186 /* Scan backwards over any trailing lwsp */
187 while (inptr > start && is_lwsp (inptr[-1]))
193 return g_strndup (start, (size_t) (inptr - start));
200 decode_value (const char **in)
202 const char *inptr = *in;
204 decode_lwsp (&inptr);
208 return decode_quoted_string (in);
209 } else if (is_ttoken (*inptr)) {
210 return decode_token (in);
213 #ifndef STRICT_PARSER
214 return decode_token (in);
220 /* This function is basically the same as decode_token()
221 * except that it will not accept *'s which have a special
222 * meaning for rfc2184 params */
224 decode_param_token (const char **in)
226 const char *inptr = *in;
229 decode_lwsp (&inptr);
232 while (is_ttoken (*inptr) && *inptr != '*')
236 return g_strndup (start, (size_t) (inptr - start));
243 decode_rfc2184_param (const char **in, char **paramp, int *part, gboolean *encoded)
245 gboolean is_rfc2184 = FALSE;
246 const char *inptr = *in;
252 param = decode_param_token (&inptr);
254 decode_lwsp (&inptr);
260 decode_lwsp (&inptr);
262 /* form := param*=value */
265 /* form := param*#=value or param*#*=value */
266 *part = decode_int (&inptr);
268 decode_lwsp (&inptr);
270 /* form := param*#*=value */
273 decode_lwsp (&inptr);
288 decode_param (const char **in, char **paramp, char **valuep, int *id, gboolean *encoded)
290 gboolean is_rfc2184 = FALSE;
291 const char *inptr = *in;
292 char *param, *value = NULL;
295 is_rfc2184 = decode_rfc2184_param (&inptr, ¶m, id, encoded);
299 value = decode_value (&inptr);
301 if (!is_rfc2184 && value) {
302 if (strstr (value, "=?") != NULL) {
303 /* We (may) have a broken param value that is rfc2047
304 * encoded. Since both Outlook and Netscape/Mozilla do
305 * this, we should handle this case.
308 if ((val = g_mime_utils_header_decode_text (value))) {
314 if (!g_utf8_validate (value, -1, NULL)) {
315 /* A (broken) mailer has sent us an unencoded 8bit value.
316 * Attempt to save it by assuming it's in the user's
317 * locale and converting to UTF-8 */
319 if ((val = g_mime_iconv_locale_to_utf8 (value))) {
323 d(g_warning ("Failed to convert %s param value (\"%s\") to UTF-8: %s",
324 param, value, g_strerror (errno)));
330 if (param && value) {
343 struct _rfc2184_part {
348 struct _rfc2184_param {
349 struct _rfc2184_param *next;
357 rfc2184_sort_cb (const void *v0, const void *v1)
359 const struct _rfc2184_part *p0 = *((struct _rfc2184_part **) v0);
360 const struct _rfc2184_part *p1 = *((struct _rfc2184_part **) v1);
362 return p0->id - p1->id;
365 #define HEXVAL(c) (isdigit (c) ? (c) - '0' : tolower (c) - 'a' + 10)
368 hex_decode (const char *in, size_t len, char *out)
370 register const unsigned char *inptr = (const unsigned char *) in;
371 register unsigned char *outptr = (unsigned char *) out;
372 const unsigned char *inend = inptr + len;
374 while (inptr < inend) {
376 if (isxdigit (inptr[1]) && isxdigit (inptr[2])) {
377 *outptr++ = HEXVAL (inptr[1]) * 16 + HEXVAL (inptr[2]);
380 *outptr++ = *inptr++;
382 *outptr++ = *inptr++;
387 return ((char *) outptr) - out;
391 rfc2184_param_charset (const char **in, char **langp)
393 const char *lang, *inptr = *in;
400 while (*inptr != '\0' && *inptr != '\'')
407 charset = g_alloca (len + 1);
408 memcpy (charset, *in, len);
412 while (*inptr != '\0' && *inptr != '\'')
415 if (*inptr == '\'') {
417 *langp = g_strndup (lang, (size_t) (inptr - lang));
424 return g_mime_charset_canon_name (charset);
428 charset_convert (const char *charset, char *in, size_t inlen)
430 gboolean locale = FALSE;
434 if (!charset || !g_ascii_strcasecmp (charset, "UTF-8") || !g_ascii_strcasecmp (charset, "us-ascii")) {
435 /* we shouldn't need any charset conversion here... */
436 if (g_utf8_validate (in, inlen, NULL))
439 charset = g_mime_locale_charset ();
443 /* need charset conversion */
444 cd = g_mime_iconv_open ("UTF-8", charset);
445 if (cd == (iconv_t) -1 && !locale) {
446 charset = g_mime_locale_charset ();
447 cd = g_mime_iconv_open ("UTF-8", charset);
450 if (cd != (iconv_t) -1) {
451 result = g_mime_iconv_strndup (cd, in, inlen);
452 g_mime_iconv_close (cd);
464 rfc2184_decode (const char *value)
466 const char *inptr = value;
471 charset = rfc2184_param_charset (&inptr, NULL);
473 len = strlen (inptr);
474 decoded = g_alloca (len + 1);
475 len = hex_decode (inptr, len, decoded);
477 return charset_convert (charset, g_strdup (decoded), len);
481 rfc2184_param_add_part (struct _rfc2184_param *rfc2184, char *value, int id, gboolean encoded)
483 struct _rfc2184_part *part;
486 part = g_new (struct _rfc2184_part, 1);
487 g_ptr_array_add (rfc2184->parts, part);
491 len = strlen (value);
492 part->value = g_malloc (len + 1);
493 hex_decode (value, len, part->value);
500 static struct _rfc2184_param *
501 rfc2184_param_new (char *name, char *value, int id, gboolean encoded)
503 struct _rfc2184_param *rfc2184;
504 const char *inptr = value;
506 rfc2184 = g_new (struct _rfc2184_param, 1);
507 rfc2184->parts = g_ptr_array_new ();
508 rfc2184->next = NULL;
511 rfc2184->charset = rfc2184_param_charset (&inptr, &rfc2184->lang);
513 rfc2184->charset = NULL;
514 rfc2184->lang = NULL;
517 if (inptr == value) {
518 rfc2184_param_add_part (rfc2184, value, id, encoded);
520 rfc2184_param_add_part (rfc2184, g_strdup (inptr), id, encoded);
524 rfc2184->param = g_new (GMimeParam, 1);
525 rfc2184->param->next = NULL;
526 rfc2184->param->name = name;
527 rfc2184->param->value = NULL;
533 decode_param_list (const char *in)
535 struct _rfc2184_param *rfc2184, *list, *t;
536 GMimeParam *param, *params, *tail;
537 struct _rfc2184_part *part;
538 GHashTable *rfc2184_hash;
539 const char *inptr = in;
547 tail = (GMimeParam *) ¶ms;
550 t = (struct _rfc2184_param *) &list;
551 rfc2184_hash = g_hash_table_new (g_mime_strcase_hash, g_mime_strcase_equal);
553 decode_lwsp (&inptr);
556 /* invalid format? */
557 if (!decode_param (&inptr, &name, &value, &id, &encoded)) {
558 decode_lwsp (&inptr);
567 /* we have a multipart rfc2184 param */
568 if (!(rfc2184 = g_hash_table_lookup (rfc2184_hash, name))) {
569 rfc2184 = rfc2184_param_new (name, value, id, encoded);
570 param = rfc2184->param;
574 g_hash_table_insert (rfc2184_hash, param->name, rfc2184);
579 rfc2184_param_add_part (rfc2184, value, id, encoded);
583 param = g_new (GMimeParam, 1);
588 /* singleton encoded rfc2184 param value */
589 param->value = rfc2184_decode (value);
592 /* normal parameter value */
593 param->value = value;
600 decode_lwsp (&inptr);
601 } while (*inptr++ == ';');
603 g_hash_table_destroy (rfc2184_hash);
606 while (rfc2184 != NULL) {
609 param = rfc2184->param;
610 gvalue = g_string_new ("");
612 g_ptr_array_sort (rfc2184->parts, rfc2184_sort_cb);
613 for (i = 0; i < rfc2184->parts->len; i++) {
614 part = rfc2184->parts->pdata[i];
615 g_string_append (gvalue, part->value);
616 g_free (part->value);
620 g_ptr_array_free (rfc2184->parts, TRUE);
622 param->value = charset_convert (rfc2184->charset, gvalue->str, gvalue->len);
623 g_string_free (gvalue, FALSE);
625 g_free (rfc2184->lang);
635 * g_mime_param_new_from_string:
638 * Creates a parameter list based on the input string.
640 * Returns: a #GMimeParam structure based on @string.
643 g_mime_param_new_from_string (const char *str)
645 g_return_val_if_fail (str != NULL, NULL);
647 return decode_param_list (str);
652 * g_mime_param_destroy:
653 * @param: Mime param list to destroy
655 * Releases all memory used by this mime param back to the Operating
659 g_mime_param_destroy (GMimeParam *param)
665 g_free (param->name);
666 g_free (param->value);
675 * @param: a #GMimeParam node
677 * Gets the next #GMimeParam node in the list.
679 * Returns: the next #GMimeParam node in the list.
682 g_mime_param_next (const GMimeParam *param)
684 g_return_val_if_fail (param != NULL, NULL);
691 * g_mime_param_get_name:
692 * @param: a #GMimeParam
694 * Gets the name of the parameter.
696 * Returns: the name of the parameter.
699 g_mime_param_get_name (const GMimeParam *param)
701 g_return_val_if_fail (param != NULL, NULL);
708 * g_mime_param_get_value:
709 * @param: a #GMimeParam
711 * Gets the value of the parameter.
713 * Returns: the value of the parameter.
716 g_mime_param_get_value (const GMimeParam *param)
718 g_return_val_if_fail (param != NULL, NULL);
725 * g_mime_param_append:
726 * @params: param list
727 * @name: new param name
728 * @value: new param value
730 * Appends a new parameter with name @name and value @value to the
731 * parameter list @params.
733 * Returns: a param list with the new param of name @name and value
734 * @value appended to the list of params @params.
737 g_mime_param_append (GMimeParam *params, const char *name, const char *value)
739 GMimeParam *param, *p;
741 g_return_val_if_fail (name != NULL, params);
742 g_return_val_if_fail (value != NULL, params);
744 param = g_mime_param_new (name, value);
758 * g_mime_param_append_param:
759 * @params: param list
760 * @param: param to append
762 * Appends @param to the param list @params.
764 * Returns: a param list with the new param @param appended to the list
768 g_mime_param_append_param (GMimeParam *params, GMimeParam *param)
772 g_return_val_if_fail (param != NULL, params);
785 /* FIXME: I wrote this in a quick & dirty fasion - it may not be 100% correct */
787 encode_param (const char *in, gboolean *encoded)
789 register const unsigned char *inptr = (const unsigned char *) in;
790 const unsigned char *instart = inptr;
791 iconv_t cd = (iconv_t) -1;
792 const char *charset = NULL;
800 while (*inptr && ((inptr - instart) < GMIME_FOLD_LEN)) {
807 return g_strdup (in);
810 charset = g_mime_charset_best (in, strlen (in));
813 charset = "iso-8859-1";
815 if (g_ascii_strcasecmp (charset, "UTF-8") != 0)
816 cd = g_mime_iconv_open (charset, "UTF-8");
818 if (cd != (iconv_t) -1) {
819 outbuf = g_mime_iconv_strdup (cd, in);
820 g_mime_iconv_close (cd);
821 if (outbuf == NULL) {
825 inptr = (const unsigned char *) outbuf;
832 /* FIXME: set the 'language' as well, assuming we can get that info...? */
833 out = g_string_new ("");
834 g_string_append_printf (out, "%s''", charset);
836 while ((c = *inptr++)) {
837 if (!is_attrchar (c))
838 g_string_append_printf (out, "%%%c%c", tohex[(c >> 4) & 0xf], tohex[c & 0xf]);
840 g_string_append_c (out, c);
846 g_string_free (out, FALSE);
853 g_string_append_len_quoted (GString *out, const char *in, size_t len)
855 register const char *inptr;
858 g_string_append_c (out, '"');
863 while (inptr < inend) {
864 if ((*inptr == '"') || *inptr == '\\')
865 g_string_append_c (out, '\\');
867 g_string_append_c (out, *inptr);
872 g_string_append_c (out, '"');
876 param_list_format (GString *out, const GMimeParam *param, gboolean fold)
881 gboolean encoded = FALSE;
892 if (!(value = encode_param (param->value, &encoded))) {
893 w(g_warning ("appending parameter %s=%s violates rfc2184",
894 param->name, param->value));
895 value = g_strdup (param->value);
901 for (ch = value; *ch; ch++) {
902 if (!is_attrchar (*ch) || is_lwsp (*ch))
907 nlen = strlen (param->name);
908 vlen = strlen (value);
910 if (fold && (used + nlen + vlen + quote > GMIME_FOLD_LEN - 2)) {
911 g_string_append (out, ";\n\t");
915 g_string_append (out, "; ");
920 if (nlen + vlen + quote > GMIME_FOLD_LEN - 2) {
921 /* we need to do special rfc2184 parameter wrapping */
922 size_t maxlen = GMIME_FOLD_LEN - (nlen + 6);
927 inend = value + vlen;
929 while (inptr < inend) {
930 char *ptr = inptr + MIN ((size_t) (inend - inptr), maxlen);
932 if (encoded && ptr < inend) {
933 /* be careful not to break an encoded char (ie %20) */
937 for ( ; j > 0 && q > inptr && *q != '%'; j--, q--);
944 g_string_append (out, ";\n\t");
946 g_string_append (out, "; ");
952 g_string_append_printf (out, "%s*%d%s=", param->name,
953 i++, encoded ? "*" : "");
955 if (encoded || !quote)
956 g_string_append_len (out, inptr, (size_t) (ptr - inptr));
958 g_string_append_len_quoted (out, inptr, (size_t) (ptr - inptr));
960 used += (out->len - here);
965 g_string_append_printf (out, "%s%s=", param->name, encoded ? "*" : "");
967 if (encoded || !quote)
968 g_string_append_len (out, value, vlen);
970 g_string_append_len_quoted (out, value, vlen);
972 used += (out->len - here);
981 g_string_append_c (out, '\n');
986 * g_mime_param_write_to_string:
987 * @param: MIME Param list
988 * @fold: specifies whether or not to fold headers
989 * @str: output string
991 * Assumes the output string contains only the Content-* header and
992 * it's immediate value.
994 * Writes the params out to the string @string.
997 g_mime_param_write_to_string (const GMimeParam *param, gboolean fold, GString *str)
999 g_return_if_fail (str != NULL);
1001 param_list_format (str, param, fold);