gmime/gmime-parse-utils.c

   1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
   2 /*  GMime
   3  *  Copyright (C) 2000-2012 Jeffrey Stedfast
   4  *
   5  *  This library is free software; you can redistribute it and/or
   6  *  modify it under the terms of the GNU Lesser General Public License
   7  *  as published by the Free Software Foundation; either version 2.1
   8  *  of the License, or (at your option) any later version.
   9  *
  10  *  This library is distributed in the hope that it will be useful,
  11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  *  Lesser General Public License for more details.
  14  *
  15  *  You should have received a copy of the GNU Lesser General Public
  16  *  License along with this library; if not, write to the Free
  17  *  Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
  18  *  02110-1301, USA.
  19  */
  20
  21
  22 #ifdef HAVE_CONFIG_H
  23 #include <config.h>
  24 #endif
  25
  26 #include <glib.h>
  27
  28 #include "gmime-table-private.h"
  29 #include "gmime-parse-utils.h"
  30
  31
  32 #ifdef ENABLE_WARNINGS
  33 #define w(x) x
  34 #else
  35 #define w(x)
  36 #endif /* ENABLE_WARNINGS */
  37
  38 #define d(x)
  39
  40
  41 /**
  42  * g_mime_parse_content_type:
  43  * @in: address of input text string
  44  * @type: address of the 'type' output string
  45  * @subtype: address of the 'subtype' output string
  46  *
  47  * Decodes the simple Content-Type type/subtype tokens and updates @in
  48  * to point to the first char after the end of the subtype.
  49  *
  50  * Returns: %TRUE if the string was successfully parsed or %FALSE
  51  * otherwise.
  52  **/
  53 gboolean
  54 g_mime_parse_content_type (const char **in, char **type, char **subtype)
  55 {
  56         register const char *inptr;
  57         const char *start = *in;
  58
  59         decode_lwsp (&start);
  60         inptr = start;
  61
  62         /* decode the type */
  63         while (*inptr && is_ttoken (*inptr))
  64                 inptr++;
  65
  66         *type = g_strndup (start, (size_t) (inptr - start));
  67
  68         start = inptr;
  69         decode_lwsp (&start);
  70
  71         /* check for type/subtype delimeter */
  72         if (*start++ != '/') {
  73                 g_free (*type);
  74                 *subtype = NULL;
  75                 *type = NULL;
  76                 return FALSE;
  77         }
  78
  79         decode_lwsp (&start);
  80         inptr = start;
  81
  82         /* decode the subtype */
  83         while (*inptr && is_ttoken (*inptr))
  84                 inptr++;
  85
  86         /* check that the subtype exists */
  87         if (inptr == start) {
  88                 g_free (*type);
  89                 *subtype = NULL;
  90                 *type = NULL;
  91                 return FALSE;
  92         }
  93
  94         *subtype = g_strndup (start, (size_t) (inptr - start));
  95
  96         /* update the input string pointer */
  97         *in = inptr;
  98
  99         return TRUE;
 100 }
 101
 102
 103 /**
 104  * g_mime_decode_lwsp:
 105  * @in: address of input text string
 106  *
 107  * Skips past any LWSP or rfc822 comments in *@in and updates @in.
 108  **/
 109 void
 110 g_mime_decode_lwsp (const char **in)
 111 {
 112         const char *inptr = *in;
 113
 114         while (*inptr && (*inptr == '(' || is_lwsp (*inptr))) {
 115                 while (*inptr && is_lwsp (*inptr))
 116                         inptr++;
 117
 118                 /* skip over any comments */
 119                 if (*inptr == '(') {
 120                         int depth = 1;
 121
 122                         inptr++;
 123                         while (*inptr && depth) {
 124                                 if (*inptr == '\\' && *(inptr + 1))
 125                                         inptr++;
 126                                 else if (*inptr == '(')
 127                                         depth++;
 128                                 else if (*inptr == ')')
 129                                         depth--;
 130
 131                                 inptr++;
 132                         }
 133                 }
 134         }
 135
 136         *in = inptr;
 137 }
 138
 139 static const char *
 140 decode_quoted_string (const char **in)
 141 {
 142         register const char *inptr = *in;
 143         const char *qstring = NULL;
 144
 145         if (*inptr == '"') {
 146                 qstring = inptr;
 147
 148                 inptr++;
 149                 while (*inptr && *inptr != '"') {
 150                         if (*inptr == '\\')
 151                                 inptr++;
 152
 153                         if (*inptr)
 154                                 inptr++;
 155                 }
 156
 157                 if (*inptr == '"')
 158                         inptr++;
 159
 160                 *in = inptr;
 161         }
 162
 163         return qstring;
 164 }
 165
 166 static const char *
 167 decode_atom (const char **in)
 168 {
 169         register const char *inptr = *in;
 170         const char *atom = NULL;
 171
 172         if (!is_atom (*inptr))
 173                 return NULL;
 174
 175         atom = inptr++;
 176         while (is_atom (*inptr))
 177                 inptr++;
 178
 179         *in = inptr;
 180
 181         return atom;
 182 }
 183
 184
 185 /**
 186  * g_mime_decode_word:
 187  * @in: address of input text string
 188  *
 189  * Extracts the next rfc822 'word' token.
 190  *
 191  * Returns: the next rfc822 'word' token or %NULL if non exist.
 192  **/
 193 const char *
 194 g_mime_decode_word (const char **in)
 195 {
 196         const char *inptr = *in;
 197
 198         decode_lwsp (&inptr);
 199         if (*inptr == '"') {
 200                 *in = inptr;
 201                 return decode_quoted_string (in);
 202         } else {
 203                 *in = inptr;
 204                 return decode_atom (in);
 205         }
 206 }
 207
 208 static gboolean
 209 decode_subliteral (const char **in, GString *domain)
 210 {
 211         const char *inptr = *in;
 212         gboolean got = FALSE;
 213
 214         while (*inptr && *inptr != '.' && *inptr != ']') {
 215                 if (is_dtext (*inptr)) {
 216                         g_string_append_c (domain, *inptr);
 217                         inptr++;
 218                         got = TRUE;
 219                 } else if (is_lwsp (*inptr))
 220                         decode_lwsp (&inptr);
 221                 else
 222                         break;
 223         }
 224
 225         *in = inptr;
 226
 227         return got;
 228 }
 229
 230 static void
 231 decode_domain_literal (const char **in, GString *domain)
 232 {
 233         const char *inptr = *in;
 234
 235         decode_lwsp (&inptr);
 236         while (*inptr && *inptr != ']') {
 237                 if (decode_subliteral (&inptr, domain) && *inptr == '.') {
 238                         g_string_append_c (domain, *inptr);
 239                         inptr++;
 240                 } else if (*inptr != ']') {
 241                         w(g_warning ("Malformed domain-literal, unexpected char '%c': %s",
 242                                      *inptr, *in));
 243
 244                         /* try and skip to the next char ?? */
 245                         inptr++;
 246                 }
 247         }
 248
 249         *in = inptr;
 250 }
 251
 252
 253 /**
 254  * g_mime_decode_domain:
 255  * @in: address of input text string
 256  * @domain: a #GString to decode the domain into
 257  *
 258  * Extracts the next rfc822 'domain' token and appends it to @domain.
 259  *
 260  * Returns: %TRUE if an rfc822 'domain' token was decoded or %FALSE
 261  * otherwise.
 262  **/
 263 gboolean
 264 g_mime_decode_domain (const char **in, GString *domain)
 265 {
 266         const char *inptr, *save, *atom;
 267         size_t initial = domain->len;
 268
 269         inptr = *in;
 270         while (inptr && *inptr) {
 271                 decode_lwsp (&inptr);
 272                 if (*inptr == '[') {
 273                         /* domain literal */
 274                         g_string_append_c (domain, '[');
 275                         inptr++;
 276
 277                         decode_domain_literal (&inptr, domain);
 278
 279                         if (*inptr == ']') {
 280                                 g_string_append_c (domain, ']');
 281                                 inptr++;
 282                         } else
 283                                 w(g_warning ("Missing ']' in domain-literal: %s", *in));
 284                 } else {
 285                         if (!(atom = decode_atom (&inptr))) {
 286                                 w(g_warning ("Unexpected char '%c' in domain: %s", *inptr, *in));
 287                                 /* remove the last '.' */
 288                                 if (domain->len && domain->str[domain->len - 1] == '.')
 289                                         g_string_truncate (domain, domain->len - 1);
 290                                 break;
 291                         }
 292
 293                         g_string_append_len (domain, atom, (size_t) (inptr - atom));
 294                 }
 295
 296                 save = inptr;
 297                 decode_lwsp (&inptr);
 298                 if (*inptr != '.') {
 299                         inptr = save;
 300                         break;
 301                 }
 302
 303                 g_string_append_c (domain, '.');
 304                 inptr++;
 305         }
 306
 307         *in = inptr;
 308
 309         return domain->len > initial;
 310 }