Initialize the gmime for upstream
[platform/upstream/gmime.git] / gmime / gmime-parser.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*  GMime
3  *  Copyright (C) 2000-2012 Jeffrey Stedfast
4  *
5  *  This library is free software; you can redistribute it and/or
6  *  modify it under the terms of the GNU Lesser General Public License
7  *  as published by the Free Software Foundation; either version 2.1
8  *  of the License, or (at your option) any later version.
9  *
10  *  This library is distributed in the hope that it will be useful,
11  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
12  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13  *  Lesser General Public License for more details.
14  *
15  *  You should have received a copy of the GNU Lesser General Public
16  *  License along with this library; if not, write to the Free
17  *  Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
18  *  02110-1301, USA.
19  */
20
21
22 #ifdef HAVE_CONFIG_H
23 #include <config.h>
24 #endif
25
26 #include <stdlib.h>
27 #include <string.h>
28 #include <sys/types.h>
29
30 #include "gmime-parser.h"
31
32 #include "gmime-table-private.h"
33 #include "gmime-message-part.h"
34 #include "gmime-parse-utils.h"
35 #include "gmime-stream-mem.h"
36 #include "gmime-multipart.h"
37 #include "gmime-common.h"
38 #include "gmime-part.h"
39
40 #if GLIB_MAJOR_VERSION > 2 || (GLIB_MAJOR_VERSION == 2 && GLIB_MINOR_VERSION >= 14)
41 #define HAVE_GLIB_REGEX
42 #elif defined (HAVE_REGEX_H)
43 #include <regex.h>
44 #endif
45
46 #ifdef ENABLE_WARNINGS
47 #define w(x) x
48 #else
49 #define w(x)
50 #endif /* ENABLE_WARNINGS */
51
52 #define d(x)
53
54
55 /**
56  * SECTION: gmime-parser
57  * @title: GMimeParser
58  * @short_description: Message and MIME part parser
59  * @see_also:
60  *
61  * A #GMimeParser parses a stream into a #GMimeMessage or other
62  * #GMimeObject and can also handle parsing MBox formatted streams
63  * into multiple #GMimeMessage objects.
64  **/
65
66 typedef struct _boundary_stack {
67         struct _boundary_stack *parent;
68         char *boundary;
69         size_t boundarylen;
70         size_t boundarylenfinal;
71         size_t boundarylenmax;
72         gint64 content_end;
73 } BoundaryStack;
74
75 typedef struct _header_raw {
76         struct _header_raw *next;
77         char *name, *value;
78         gint64 offset;
79 } HeaderRaw;
80
81 typedef struct _content_type {
82         char *type, *subtype;
83         gboolean exists;
84 } ContentType;
85
86 extern void _g_mime_object_set_content_type (GMimeObject *object, GMimeContentType *content_type);
87
88 static void g_mime_parser_class_init (GMimeParserClass *klass);
89 static void g_mime_parser_init (GMimeParser *parser, GMimeParserClass *klass);
90 static void g_mime_parser_finalize (GObject *object);
91
92 static void parser_init (GMimeParser *parser, GMimeStream *stream);
93 static void parser_close (GMimeParser *parser);
94
95 static GMimeObject *parser_construct_leaf_part (GMimeParser *parser, ContentType *content_type,
96                                                 gboolean toplevel, int *found);
97 static GMimeObject *parser_construct_multipart (GMimeParser *parser, ContentType *content_type,
98                                                 gboolean toplevel, int *found);
99
100 static GObjectClass *parent_class = NULL;
101
102 /* size of read buffer */
103 #define SCAN_BUF 4096
104
105 /* headroom guaranteed to be before each read buffer */
106 #define SCAN_HEAD 128
107
108 /* conservative growth sizes */
109 #define HEADER_INIT_SIZE 128
110 #define HEADER_RAW_INIT_SIZE 1024
111
112
113 enum {
114         GMIME_PARSER_STATE_ERROR = -1,
115         GMIME_PARSER_STATE_INIT,
116         GMIME_PARSER_STATE_FROM,
117         GMIME_PARSER_STATE_MESSAGE_HEADERS,
118         GMIME_PARSER_STATE_HEADERS,
119         GMIME_PARSER_STATE_HEADERS_END,
120         GMIME_PARSER_STATE_CONTENT,
121         GMIME_PARSER_STATE_COMPLETE,
122 };
123
124 struct _GMimeParserPrivate {
125         GMimeStream *stream;
126         
127         gint64 offset;
128         
129         /* i/o buffers */
130         char realbuf[SCAN_HEAD + SCAN_BUF + 1];
131         char *inbuf;
132         char *inptr;
133         char *inend;
134         
135         gint64 from_offset;
136         GByteArray *from_line;
137         
138         GMimeParserHeaderRegexFunc header_cb;
139         gpointer user_data;
140         
141 #if defined (HAVE_GLIB_REGEX)
142         GRegex *regex;
143 #elif defined (HAVE_REGEX_H)
144         regex_t regex;
145 #endif
146         
147         /* header buffer */
148         char *headerbuf;
149         char *headerptr;
150         size_t headerleft;
151         
152         /* raw header buffer */
153         char *rawbuf;
154         char *rawptr;
155         size_t rawleft;
156         
157         /* current message headerblock offsets */
158         gint64 message_headers_begin;
159         gint64 message_headers_end;
160         
161         /* current mime-part headerblock offsets */
162         gint64 headers_begin;
163         gint64 headers_end;
164         
165         /* current header field offset */
166         gint64 header_offset;
167         
168         short int state;
169         
170         unsigned short int unused:10;
171         unsigned short int midline:1;
172         unsigned short int seekable:1;
173         unsigned short int scan_from:1;
174         unsigned short int have_regex:1;
175         unsigned short int persist_stream:1;
176         unsigned short int respect_content_length:1;
177         
178         HeaderRaw *headers;
179         
180         BoundaryStack *bounds;
181 };
182
183 static const char MBOX_BOUNDARY[6] = "From ";
184 #define MBOX_BOUNDARY_LEN 5
185
186 static void
187 parser_push_boundary (GMimeParser *parser, const char *boundary)
188 {
189         struct _GMimeParserPrivate *priv = parser->priv;
190         BoundaryStack *s;
191         size_t max;
192         
193         max = priv->bounds ? priv->bounds->boundarylenmax : 0;
194         
195         s = g_slice_new (BoundaryStack);
196         s->parent = priv->bounds;
197         priv->bounds = s;
198         
199         if (boundary == MBOX_BOUNDARY) {
200                 s->boundary = g_strdup (boundary);
201                 s->boundarylen = MBOX_BOUNDARY_LEN;
202                 s->boundarylenfinal = MBOX_BOUNDARY_LEN;
203         } else {
204                 s->boundary = g_strdup_printf ("--%s--", boundary);
205                 s->boundarylen = strlen (boundary) + 2;
206                 s->boundarylenfinal = s->boundarylen + 2;
207         }
208         
209         s->boundarylenmax = MAX (s->boundarylenfinal, max);
210         
211         s->content_end = -1;
212 }
213
214 static void
215 parser_pop_boundary (GMimeParser *parser)
216 {
217         struct _GMimeParserPrivate *priv = parser->priv;
218         BoundaryStack *s;
219         
220         if (!priv->bounds) {
221                 d(g_warning ("boundary stack underflow"));
222                 return;
223         }
224         
225         s = priv->bounds;
226         priv->bounds = priv->bounds->parent;
227         
228         g_free (s->boundary);
229         
230         g_slice_free (BoundaryStack, s);
231 }
232
233 static const char *
234 header_raw_find (HeaderRaw *headers, const char *name, gint64 *offset)
235 {
236         HeaderRaw *header = headers;
237         
238         while (header) {
239                 if (!g_ascii_strcasecmp (header->name, name)) {
240                         if (offset)
241                                 *offset = header->offset;
242                         return header->value;
243                 }
244                 
245                 header = header->next;
246         }
247         
248         return NULL;
249 }
250
251 static void
252 header_raw_clear (HeaderRaw **headers)
253 {
254         HeaderRaw *header, *next;
255         
256         header = *headers;
257         while (header) {
258                 next = header->next;
259                 g_free (header->name);
260                 g_free (header->value);
261                 
262                 g_slice_free (HeaderRaw, header);
263                 
264                 header = next;
265         }
266         
267         *headers = NULL;
268 }
269
270 GType
271 g_mime_parser_get_type (void)
272 {
273         static GType type = 0;
274         
275         if (!type) {
276                 static const GTypeInfo info = {
277                         sizeof (GMimeParserClass),
278                         NULL, /* base_class_init */
279                         NULL, /* base_class_finalize */
280                         (GClassInitFunc) g_mime_parser_class_init,
281                         NULL, /* class_finalize */
282                         NULL, /* class_data */
283                         sizeof (GMimeParser),
284                         0,    /* n_preallocs */
285                         (GInstanceInitFunc) g_mime_parser_init,
286                 };
287                 
288                 type = g_type_register_static (G_TYPE_OBJECT, "GMimeParser", &info, 0);
289         }
290         
291         return type;
292 }
293
294
295 static void
296 g_mime_parser_class_init (GMimeParserClass *klass)
297 {
298         GObjectClass *object_class = G_OBJECT_CLASS (klass);
299         
300         parent_class = g_type_class_ref (G_TYPE_OBJECT);
301         
302         object_class->finalize = g_mime_parser_finalize;
303 }
304
305 static void
306 g_mime_parser_init (GMimeParser *parser, GMimeParserClass *klass)
307 {
308         parser->priv = g_new (struct _GMimeParserPrivate, 1);
309         parser->priv->respect_content_length = FALSE;
310         parser->priv->persist_stream = TRUE;
311         parser->priv->have_regex = FALSE;
312         parser->priv->scan_from = FALSE;
313         
314 #if defined (HAVE_GLIB_REGEX)
315         parser->priv->regex = NULL;
316 #endif
317         
318         parser_init (parser, NULL);
319 }
320
321 static void
322 g_mime_parser_finalize (GObject *object)
323 {
324         GMimeParser *parser = (GMimeParser *) object;
325         
326         parser_close (parser);
327         
328 #if defined (HAVE_GLIB_REGEX)
329         if (parser->priv->regex)
330                 g_regex_unref (parser->priv->regex);
331 #elif defined (HAVE_REGEX_H)
332         if (parser->priv->have_regex)
333                 regfree (&parser->priv->regex);
334 #endif
335         
336         g_free (parser->priv);
337         
338         G_OBJECT_CLASS (parent_class)->finalize (object);
339 }
340
341
342 static void
343 parser_init (GMimeParser *parser, GMimeStream *stream)
344 {
345         struct _GMimeParserPrivate *priv = parser->priv;
346         gint64 offset = -1;
347         
348         if (stream) {
349                 g_object_ref (stream);
350                 offset = g_mime_stream_tell (stream);
351         }
352         
353         priv->state = GMIME_PARSER_STATE_INIT;
354         
355         priv->stream = stream;
356         
357         priv->offset = offset;
358         
359         priv->inbuf = priv->realbuf + SCAN_HEAD;
360         priv->inptr = priv->inbuf;
361         priv->inend = priv->inbuf;
362         
363         priv->from_offset = -1;
364         priv->from_line = g_byte_array_new ();
365         
366         priv->headerbuf = g_malloc (HEADER_INIT_SIZE);
367         priv->headerleft = HEADER_INIT_SIZE - 1;
368         priv->headerptr = priv->headerbuf;
369         
370         if (offset == -1 || !priv->persist_stream) {
371                 priv->rawbuf = g_malloc (HEADER_RAW_INIT_SIZE);
372                 priv->rawleft = HEADER_RAW_INIT_SIZE - 1;
373                 priv->rawptr = priv->rawbuf;
374         } else {
375                 priv->rawbuf = NULL;
376                 priv->rawptr = NULL;
377                 priv->rawleft = 0;
378         }
379         
380         priv->message_headers_begin = -1;
381         priv->message_headers_end = -1;
382         
383         priv->headers_begin = -1;
384         priv->headers_end = -1;
385         
386         priv->header_offset = -1;
387         
388         priv->midline = FALSE;
389         priv->seekable = offset != -1;
390         
391         priv->headers = NULL;
392         
393         priv->bounds = NULL;
394 }
395
396 static void
397 parser_close (GMimeParser *parser)
398 {
399         struct _GMimeParserPrivate *priv = parser->priv;
400         
401         if (priv->stream)
402                 g_object_unref (priv->stream);
403         
404         g_byte_array_free (priv->from_line, TRUE);
405         
406         g_free (priv->headerbuf);
407         g_free (priv->rawbuf);
408         
409         header_raw_clear (&priv->headers);
410         
411         while (priv->bounds)
412                 parser_pop_boundary (parser);
413 }
414
415
416 /**
417  * g_mime_parser_new:
418  *
419  * Creates a new parser object.
420  *
421  * Returns: a new parser object.
422  **/
423 GMimeParser *
424 g_mime_parser_new (void)
425 {
426         return g_object_newv (GMIME_TYPE_PARSER, 0, NULL);
427 }
428
429
430 /**
431  * g_mime_parser_new_with_stream:
432  * @stream: raw message or part stream
433  *
434  * Creates a new parser object preset to parse @stream.
435  *
436  * Returns: a new parser object.
437  **/
438 GMimeParser *
439 g_mime_parser_new_with_stream (GMimeStream *stream)
440 {
441         GMimeParser *parser;
442         
443         parser = g_mime_parser_new ();
444         g_mime_parser_init_with_stream (parser, stream);
445         
446         return parser;
447 }
448
449
450 /**
451  * g_mime_parser_init_with_stream:
452  * @parser: a #GMimeParser context
453  * @stream: raw message or part stream
454  *
455  * Initializes @parser to use @stream.
456  *
457  * WARNING: Initializing a parser with a stream is comparable to
458  * selling your soul (@stream) to the devil (@parser). You are
459  * basically giving the parser complete control of the stream, this
460  * means that you had better not touch the stream so long as the
461  * parser is still using it. This means no reading, writing, seeking,
462  * or resetting of the stream. Anything that will/could change the
463  * current stream's offset is PROHIBITED.
464  *
465  * It is also recommended that you not use g_mime_stream_tell()
466  * because it will not necessarily give you the current @parser offset
467  * since @parser handles its own internal read-ahead buffer. Instead,
468  * it is recommended that you use g_mime_parser_tell() if you have a
469  * reason to need the current offset of the @parser.
470  **/
471 void
472 g_mime_parser_init_with_stream (GMimeParser *parser, GMimeStream *stream)
473 {
474         g_return_if_fail (GMIME_IS_PARSER (parser));
475         g_return_if_fail (GMIME_IS_STREAM (stream));
476         
477         parser_close (parser);
478         parser_init (parser, stream);
479 }
480
481
482 /**
483  * g_mime_parser_get_persist_stream:
484  * @parser: a #GMimeParser context
485  *
486  * Gets whether or not the underlying stream is persistent.
487  *
488  * Returns: %TRUE if the @parser will leave the content on disk or
489  * %FALSE if it will load the content into memory.
490  **/
491 gboolean
492 g_mime_parser_get_persist_stream (GMimeParser *parser)
493 {
494         g_return_val_if_fail (GMIME_IS_PARSER (parser), FALSE);
495         
496         return (parser->priv->persist_stream && parser->priv->seekable);
497 }
498
499
500 /**
501  * g_mime_parser_set_persist_stream:
502  * @parser: a #GMimeParser context
503  * @persist: persist attribute
504  *
505  * Sets whether or not the @parser's underlying stream is persistent.
506  *
507  * If @persist is %TRUE, the @parser will attempt to construct
508  * messages/parts whose content will remain on disk rather than being
509  * loaded into memory so as to reduce memory usage. This is the default.
510  *
511  * If @persist is %FALSE, the @parser will always load message content
512  * into memory.
513  *
514  * Note: This attribute only serves as a hint to the @parser. If the
515  * underlying stream does not support seeking, then this attribute
516  * will be ignored.
517  **/
518 void
519 g_mime_parser_set_persist_stream (GMimeParser *parser, gboolean persist)
520 {
521         struct _GMimeParserPrivate *priv;
522         
523         g_return_if_fail (GMIME_IS_PARSER (parser));
524         
525         priv = parser->priv;
526         
527         if (priv->persist_stream == persist)
528                 return;
529         
530         if (persist) {
531                 priv->persist_stream = TRUE;
532                 
533                 if (priv->seekable && !priv->rawbuf) {
534                         priv->rawbuf = g_malloc (HEADER_RAW_INIT_SIZE);
535                         priv->rawleft = HEADER_RAW_INIT_SIZE - 1;
536                         priv->rawptr = priv->rawbuf;
537                 }
538         } else {
539                 priv->persist_stream = FALSE;
540                 
541                 if (priv->rawbuf) {
542                         g_free (priv->rawbuf);
543                         priv->rawbuf = NULL;
544                         priv->rawptr = NULL;
545                         priv->rawleft = 0;
546                 }
547         }
548 }
549
550
551 /**
552  * g_mime_parser_get_scan_from:
553  * @parser: a #GMimeParser context
554  *
555  * Gets whether or not @parser is set to scan mbox-style From-lines.
556  *
557  * Returns: whether or not @parser is set to scan mbox-style
558  * From-lines.
559  **/
560 gboolean
561 g_mime_parser_get_scan_from (GMimeParser *parser)
562 {
563         g_return_val_if_fail (GMIME_IS_PARSER (parser), FALSE);
564         
565         return parser->priv->scan_from;
566 }
567
568
569 /**
570  * g_mime_parser_set_scan_from:
571  * @parser: a #GMimeParser context
572  * @scan_from: %TRUE to scan From-lines or %FALSE otherwise
573  *
574  * Sets whether or not @parser should scan mbox-style From-lines.
575  **/
576 void
577 g_mime_parser_set_scan_from (GMimeParser *parser, gboolean scan_from)
578 {
579         g_return_if_fail (GMIME_IS_PARSER (parser));
580         
581         parser->priv->scan_from = scan_from ? 1 : 0;
582 }
583
584
585 /**
586  * g_mime_parser_get_respect_content_length:
587  * @parser: a #GMimeParser context
588  *
589  * Gets whether or not @parser is set to use Content-Length for
590  * determining the offset of the end of the message.
591  *
592  * Returns: whether or not @parser is set to use Content-Length for
593  * determining the offset of the end of the message.
594  **/
595 gboolean
596 g_mime_parser_get_respect_content_length (GMimeParser *parser)
597 {
598         g_return_val_if_fail (GMIME_IS_PARSER (parser), FALSE);
599         
600         return parser->priv->respect_content_length;
601 }
602
603
604 /**
605  * g_mime_parser_set_respect_content_length:
606  * @parser: a #GMimeParser context
607  * @respect_content_length: %TRUE if the parser should use Content-Length headers or %FALSE otherwise.
608  *
609  * Sets whether or not @parser should respect Content-Length headers
610  * when deciding where to look for the start of the next message. Only
611  * used when the parser is also set to scan for From-lines.
612  *
613  * Most notably useful when parsing broken Solaris mbox files (See
614  * http://www.jwz.org/doc/content-length.html for details).
615  **/
616 void
617 g_mime_parser_set_respect_content_length (GMimeParser *parser, gboolean respect_content_length)
618 {
619         g_return_if_fail (GMIME_IS_PARSER (parser));
620         
621         parser->priv->respect_content_length = respect_content_length ? 1 : 0;
622 }
623
624
625 /**
626  * g_mime_parser_set_header_regex:
627  * @parser: a #GMimeParser context
628  * @regex: regular expression
629  * @header_cb: callback function
630  * @user_data: user data
631  *
632  * Sets the regular expression pattern @regex on @parser. Whenever a
633  * header matching the pattern @regex is parsed, @header_cb is called
634  * with @user_data as the user_data argument.
635  *
636  * If @regex is %NULL, then the previously registered regex callback
637  * is unregistered and no new callback is set.
638  **/
639 void
640 g_mime_parser_set_header_regex (GMimeParser *parser, const char *regex,
641                                 GMimeParserHeaderRegexFunc header_cb, gpointer user_data)
642 {
643         struct _GMimeParserPrivate *priv;
644         
645         g_return_if_fail (GMIME_IS_PARSER (parser));
646         
647         priv = parser->priv;
648         
649 #if defined (HAVE_GLIB_REGEX)
650         if (priv->regex) {
651                 g_regex_unref (priv->regex);
652                 priv->regex = NULL;
653         }
654 #elif defined (HAVE_REGEX_H)
655         if (priv->have_regex) {
656                 regfree (&priv->regex);
657                 priv->have_regex = FALSE;
658         }
659 #endif
660         
661         if (!regex || !header_cb)
662                 return;
663         
664         priv->header_cb = header_cb;
665         priv->user_data = user_data;
666         
667 #if defined (HAVE_GLIB_REGEX)
668         priv->regex = g_regex_new (regex, G_REGEX_RAW | G_REGEX_EXTENDED | G_REGEX_CASELESS, 0, NULL);
669 #elif defined (HAVE_REGEX_H)
670         priv->have_regex = !regcomp (&priv->regex, regex, REG_EXTENDED | REG_ICASE | REG_NOSUB);
671 #endif
672 }
673
674
675 static ssize_t
676 parser_fill (GMimeParser *parser, size_t atleast)
677 {
678         struct _GMimeParserPrivate *priv = parser->priv;
679         char *inbuf, *inptr, *inend;
680         ssize_t nread;
681         size_t inlen;
682         
683         inbuf = priv->inbuf;
684         inptr = priv->inptr;
685         inend = priv->inend;
686         inlen = inend - inptr;
687         
688         g_assert (inptr <= inend);
689         
690         if (inlen > atleast)
691                 return inlen;
692         
693         /* attempt to align 'inend' with realbuf + SCAN_HEAD */
694         if (inptr >= inbuf) {
695                 inbuf -= inlen < SCAN_HEAD ? inlen : SCAN_HEAD;
696                 memmove (inbuf, inptr, inlen);
697                 inptr = inbuf;
698                 inbuf += inlen;
699         } else if (inptr > priv->realbuf) {
700                 size_t shift;
701                 
702                 shift = MIN (inptr - priv->realbuf, inend - inbuf);
703                 memmove (inptr - shift, inptr, inlen);
704                 inptr -= shift;
705                 inbuf = inptr + inlen;
706         } else {
707                 /* we can't shift... */
708                 inbuf = inend;
709         }
710         
711         priv->inptr = inptr;
712         priv->inend = inbuf;
713         inend = priv->realbuf + SCAN_HEAD + SCAN_BUF;
714         
715         if ((nread = g_mime_stream_read (priv->stream, inbuf, inend - inbuf)) > 0) {
716                 priv->offset += nread;
717                 priv->inend += nread;
718         }
719         
720         return (ssize_t) (priv->inend - priv->inptr);
721 }
722
723
724 static gint64
725 parser_offset (struct _GMimeParserPrivate *priv, const char *inptr)
726 {
727         if (priv->offset == -1)
728                 return -1;
729         
730         if (!inptr)
731                 inptr = priv->inptr;
732         
733         return (priv->offset - (priv->inend - inptr));
734 }
735
736
737 /**
738  * g_mime_parser_tell:
739  * @parser: a #GMimeParser context
740  *
741  * Gets the current stream offset from the parser's internal stream.
742  *
743  * Returns: the current stream offset from the parser's internal stream
744  * or %-1 on error.
745  **/
746 gint64
747 g_mime_parser_tell (GMimeParser *parser)
748 {
749         g_return_val_if_fail (GMIME_IS_PARSER (parser), -1);
750         g_return_val_if_fail (GMIME_IS_STREAM (parser->priv->stream), -1);
751         
752         return parser_offset (parser->priv, NULL);
753 }
754
755
756 /**
757  * g_mime_parser_eos:
758  * @parser: a #GMimeParser context
759  *
760  * Tests the end-of-stream indicator for @parser's internal stream.
761  *
762  * Returns: %TRUE on EOS or %FALSE otherwise.
763  **/
764 gboolean
765 g_mime_parser_eos (GMimeParser *parser)
766 {
767         struct _GMimeParserPrivate *priv;
768         
769         g_return_val_if_fail (GMIME_IS_STREAM (parser->priv->stream), TRUE);
770         
771         priv = parser->priv;
772         return g_mime_stream_eos (priv->stream) && priv->inptr == priv->inend;
773 }
774
775 static int
776 parser_step_from (GMimeParser *parser)
777 {
778         struct _GMimeParserPrivate *priv = parser->priv;
779         register char *inptr;
780         char *start, *inend;
781         ssize_t left = 0;
782         size_t len;
783         
784         g_byte_array_set_size (priv->from_line, 0);
785         
786         inptr = priv->inptr;
787         
788         g_assert (inptr <= priv->inend);
789         
790         do {
791         refill:
792                 if (parser_fill (parser, MAX (SCAN_HEAD, left)) <= left) {
793                         /* failed to find a From line; EOF reached */
794                         priv->state = GMIME_PARSER_STATE_ERROR;
795                         priv->inptr = priv->inend;
796                         return -1;
797                 }
798                 
799                 inptr = priv->inptr;
800                 inend = priv->inend;
801                 *inend = '\n';
802                 
803                 while (inptr < inend) {
804                         start = inptr;
805                         while (*inptr != '\n')
806                                 inptr++;
807                         
808                         if (inptr + 1 >= inend) {
809                                 /* we don't have enough data; if we can't get more we have to bail */
810                                 left = (ssize_t) (inend - start);
811                                 priv->inptr = start;
812                                 goto refill;
813                         }
814                         
815                         len = (size_t) (inptr - start);
816                         inptr++;
817                         
818                         if (len >= 5 && !strncmp (start, "From ", 5)) {
819                                 priv->from_offset = parser_offset (priv, start);
820                                 g_byte_array_append (priv->from_line, (unsigned char *) start, len);
821                                 goto got_from;
822                         }
823                 }
824                 
825                 priv->inptr = inptr;
826                 left = 0;
827         } while (1);
828         
829  got_from:
830         
831         priv->state = GMIME_PARSER_STATE_MESSAGE_HEADERS;
832         
833         priv->inptr = inptr;
834         
835         return 0;
836 }
837
838 #ifdef ALLOC_NEAREST_POW2
839 static inline size_t
840 nearest_pow (size_t num)
841 {
842         size_t n;
843         
844         if (num == 0)
845                 return 0;
846         
847         n = num - 1;
848 #if defined (__GNUC__) && defined (__i386__)
849         __asm__("bsrl %1,%0\n\t"
850                 "jnz 1f\n\t"
851                 "movl $-1,%0\n"
852                 "1:" : "=r" (n) : "rm" (n));
853         n = (1 << (n + 1));
854 #else
855         n |= n >> 1;
856         n |= n >> 2;
857         n |= n >> 4;
858         n |= n >> 8;
859         n |= n >> 16;
860         n++;
861 #endif
862         
863         return n;
864 }
865
866 #define next_alloc_size(n) nearest_pow (n)
867 #else
868 static inline size_t
869 next_alloc_size (size_t n)
870 {
871         return (n + 63) & ~63;
872 }
873 #endif
874
875 #define header_append(priv, start, len) G_STMT_START {                    \
876         if (priv->headerleft <= len) {                                    \
877                 size_t hlen, hoff;                                        \
878                                                                           \
879                 hoff = priv->headerptr - priv->headerbuf;                 \
880                 hlen = next_alloc_size (hoff + len + 1);                  \
881                                                                           \
882                 priv->headerbuf = g_realloc (priv->headerbuf, hlen);      \
883                 priv->headerptr = priv->headerbuf + hoff;                 \
884                 priv->headerleft = (hlen - 1) - hoff;                     \
885         }                                                                 \
886                                                                           \
887         memcpy (priv->headerptr, start, len);                             \
888         priv->headerptr += len;                                           \
889         priv->headerleft -= len;                                          \
890 } G_STMT_END
891
892 #define raw_header_append(priv, start, len) G_STMT_START {                \
893         if (priv->rawbuf) {                                               \
894                 if (priv->rawleft <= len) {                               \
895                         size_t hlen, hoff;                                \
896                                                                           \
897                         hoff = priv->rawptr - priv->rawbuf;               \
898                         hlen = next_alloc_size (hoff + len + 1);          \
899                                                                           \
900                         priv->rawbuf = g_realloc (priv->rawbuf, hlen);    \
901                         priv->rawptr = priv->rawbuf + hoff;               \
902                         priv->rawleft = (hlen - 1) - hoff;                \
903                 }                                                         \
904                                                                           \
905                 memcpy (priv->rawptr, start, len);                        \
906                 priv->rawptr += len;                                      \
907                 priv->rawleft -= len;                                     \
908         }                                                                 \
909 } G_STMT_END
910
911 #define raw_header_reset(priv) G_STMT_START {                             \
912         if (priv->rawbuf) {                                               \
913                 priv->rawleft += priv->rawptr - priv->rawbuf;             \
914                 priv->rawptr = priv->rawbuf;                              \
915         }                                                                 \
916 } G_STMT_END
917
918 static void
919 header_parse (GMimeParser *parser, HeaderRaw **tail)
920 {
921         struct _GMimeParserPrivate *priv = parser->priv;
922         register char *inptr;
923         HeaderRaw *header;
924         
925         *priv->headerptr = '\0';
926         inptr = priv->headerbuf;
927         while (*inptr && *inptr != ':' && !is_type (*inptr, IS_SPACE | IS_CTRL))
928                 inptr++;
929         
930         if (*inptr != ':') {
931                 /* ignore invalid headers */
932                 w(g_warning ("Invalid header at %lld: '%s'",
933                              (long long) priv->header_offset,
934                              priv->headerbuf));
935                 
936                 priv->headerleft += priv->headerptr - priv->headerbuf;
937                 priv->headerptr = priv->headerbuf;
938                 
939                 return;
940         }
941         
942         header = g_slice_new (HeaderRaw);
943         header->next = NULL;
944         
945         header->name = g_strndup (priv->headerbuf, (size_t) (inptr - priv->headerbuf));
946         header->value = g_mime_strdup_trim (inptr + 1);
947         
948         header->offset = priv->header_offset;
949         
950         (*tail)->next = header;
951         *tail = header;
952         
953         priv->headerleft += priv->headerptr - priv->headerbuf;
954         priv->headerptr = priv->headerbuf;
955         
956 #if defined (HAVE_GLIB_REGEX)
957         if (priv->regex && g_regex_match (priv->regex, header->name, 0, NULL))
958                 priv->header_cb (parser, header->name, header->value,
959                                  header->offset, priv->user_data);
960 #elif defined (HAVE_REGEX_H)
961         if (priv->have_regex &&
962             !regexec (&priv->header_regex, header->name, 0, NULL, 0))
963                 priv->header_cb (parser, header->name, header->value,
964                                  header->offset, priv->user_data);
965 #endif
966 }
967
968 enum {
969         SUBJECT = 1 << 0,
970         FROM    = 1 << 1,
971         DATE    = 1 << 2,
972         TO      = 1 << 3,
973         CC      = 1 << 4
974 };
975
976 static gboolean
977 has_message_headers (HeaderRaw *headers)
978 {
979         unsigned int found = 0;
980         HeaderRaw *header;
981         
982         header = headers;
983         while (header != NULL) {
984                 if (!g_ascii_strcasecmp (header->name, "Subject"))
985                         found |= SUBJECT;
986                 else if (!g_ascii_strcasecmp (header->name, "From"))
987                         found |= FROM;
988                 else if (!g_ascii_strcasecmp (header->name, "Date"))
989                         found |= DATE;
990                 else if (!g_ascii_strcasecmp (header->name, "To"))
991                         found |= TO;
992                 else if (!g_ascii_strcasecmp (header->name, "Cc"))
993                         found |= CC;
994                 
995                 header = header->next;
996         }
997         
998         return found != 0;
999 }
1000
1001 static gboolean
1002 has_content_headers (HeaderRaw *headers)
1003 {
1004         HeaderRaw *header;
1005         
1006         header = headers;
1007         while (header != NULL) {
1008                 if (!g_ascii_strcasecmp (header->name, "Content-Type"))
1009                         return TRUE;
1010                 
1011                 header = header->next;
1012         }
1013         
1014         return FALSE;
1015 }
1016
1017 static int
1018 parser_step_headers (GMimeParser *parser)
1019 {
1020         struct _GMimeParserPrivate *priv = parser->priv;
1021         gboolean eoln, valid = TRUE, fieldname = TRUE;
1022         gboolean continuation = FALSE;
1023         register char *inptr;
1024         char *start, *inend;
1025         ssize_t left = 0;
1026         HeaderRaw *tail;
1027         size_t len;
1028         
1029         priv->midline = FALSE;
1030         raw_header_reset (priv);
1031         header_raw_clear (&priv->headers);
1032         tail = (HeaderRaw *) &priv->headers;
1033         priv->headers_begin = parser_offset (priv, NULL);
1034         priv->header_offset = priv->headers_begin;
1035         
1036         inptr = priv->inptr;
1037         inend = priv->inend;
1038         
1039         do {
1040         refill:
1041                 if (parser_fill (parser, MAX (SCAN_HEAD, left)) <= left)
1042                         break;
1043                 
1044                 inptr = priv->inptr;
1045                 inend = priv->inend;
1046                 /* Note: see optimization comment [1] */
1047                 *inend = '\n';
1048                 
1049                 g_assert (inptr <= inend);
1050                 
1051                 while (inptr < inend) {
1052                         start = inptr;
1053                         
1054                         /* if we are scanning a new line, check for a folded header */
1055                         if (!priv->midline && continuation && (*inptr != ' ' && *inptr != '\t')) {
1056                                 header_parse (parser, &tail);
1057                                 priv->header_offset = parser_offset (priv, inptr);
1058                                 continuation = FALSE;
1059                                 fieldname = TRUE;
1060                                 valid = TRUE;
1061                         }
1062                         
1063                         eoln = inptr[0] == '\n' || (inptr[0] == '\r' && inptr[1] == '\n');
1064                         if (fieldname && !eoln) {
1065                                 /* scan and validate the field name */
1066                                 if (*inptr != ':') {
1067                                         /* Note: see optimization comment [1] */
1068                                         *inend = ':';
1069                                         
1070                                         while (*inptr != ':') {
1071                                                 if (is_type (*inptr, IS_SPACE | IS_CTRL)) {
1072                                                         valid = FALSE;
1073                                                         break;
1074                                                 }
1075                                                 
1076                                                 inptr++;
1077                                         }
1078                                         
1079                                         if (inptr == inend) {
1080                                                 /* don't have the full field name */
1081                                                 left = (ssize_t) (inend - start);
1082                                                 priv->inptr = start;
1083                                                 goto refill;
1084                                         }
1085                                         
1086                                         /* Note: see optimization comment [1] */
1087                                         *inend = '\n';
1088                                 } else {
1089                                         valid = FALSE;
1090                                 }
1091                                 
1092                                 if (!valid) {
1093                                         if (priv->scan_from && (inptr - start) == 4
1094                                             && !strncmp (start, "From ", 5))
1095                                                 goto next_message;
1096                                         
1097                                         if (priv->headers != NULL) {
1098                                                 if (priv->state == GMIME_PARSER_STATE_MESSAGE_HEADERS) {
1099                                                         if (has_message_headers (priv->headers)) {
1100                                                                 /* probably the start of the content,
1101                                                                  * a broken mailer didn't terminate the
1102                                                                  * headers with an empty line. *sigh* */
1103                                                                 goto content_start;
1104                                                         }
1105                                                 } else if (has_content_headers (priv->headers)) {
1106                                                         /* probably the start of the content,
1107                                                          * a broken mailer didn't terminate the
1108                                                          * headers with an empty line. *sigh* */
1109                                                         goto content_start;
1110                                                 }
1111                                         } else if (priv->state == GMIME_PARSER_STATE_MESSAGE_HEADERS) {
1112                                                 /* Be a little more strict when scanning toplevel message
1113                                                  * headers, but remain lenient with From-lines. */
1114                                                 if ((inptr - start) != 4 || strncmp (start, "From ", 5) != 0) {
1115                                                         priv->state = GMIME_PARSER_STATE_ERROR;
1116                                                         return -1;
1117                                                 }
1118                                         }
1119                                 }
1120                         }
1121                         
1122                         fieldname = FALSE;
1123                         
1124                         /* Note: see optimization comment [1] */
1125                         while (*inptr != '\n')
1126                                 inptr++;
1127                         
1128                         len = (size_t) (inptr - start);
1129                         
1130                         if (inptr == inend) {
1131                                 /* we don't have the full line, save
1132                                  * what we have and refill our
1133                                  * buffer... */
1134                                 if (inptr > start && inptr[-1] == '\r') {
1135                                         inptr--;
1136                                         len--;
1137                                 }
1138                                 
1139                                 raw_header_append (priv, start, len);
1140                                 header_append (priv, start, len);
1141                                 left = (ssize_t) (inend - inptr);
1142                                 priv->midline = TRUE;
1143                                 priv->inptr = inptr;
1144                                 goto refill;
1145                         }
1146                         
1147                         raw_header_append (priv, start, len);
1148                         
1149                         if (inptr > start && inptr[-1] == '\r')
1150                                 len--;
1151                         
1152                         /* check to see if we've reached the end of the headers */
1153                         if (!priv->midline && len == 0)
1154                                 goto headers_end;
1155                         
1156                         header_append (priv, start, len);
1157                         
1158                         /* inptr has to be less than inend - 1 */
1159                         raw_header_append (priv, "\n", 1);
1160                         priv->midline = FALSE;
1161                         continuation = TRUE;
1162                         inptr++;
1163                 }
1164                 
1165                 left = (ssize_t) (inend - inptr);
1166                 priv->inptr = inptr;
1167         } while (1);
1168         
1169         inptr = priv->inptr;
1170         inend = priv->inend;
1171         start = inptr;
1172         
1173         len = (size_t) (inend - inptr);
1174         header_append (priv, inptr, len);
1175         raw_header_append (priv, inptr, len);
1176         
1177  headers_end:
1178         
1179         if (priv->headerptr > priv->headerbuf)
1180                 header_parse (parser, &tail);
1181         
1182         priv->headers_end = parser_offset (priv, start);
1183         priv->state = GMIME_PARSER_STATE_HEADERS_END;
1184         if (priv->rawbuf)
1185                 *priv->rawptr = '\0';
1186         priv->inptr = inptr;
1187         
1188         return 0;
1189         
1190  next_message:
1191         
1192         priv->headers_end = parser_offset (priv, start);
1193         priv->state = GMIME_PARSER_STATE_COMPLETE;
1194         if (priv->rawbuf)
1195                 *priv->rawptr = '\0';
1196         priv->inptr = start;
1197         
1198         return 0;
1199         
1200  content_start:
1201         
1202         priv->headers_end = parser_offset (priv, start);
1203         priv->state = GMIME_PARSER_STATE_CONTENT;
1204         if (priv->rawbuf)
1205                 *priv->rawptr = '\0';
1206         priv->inptr = start;
1207         
1208         return 0;
1209 }
1210
1211 static void
1212 content_type_destroy (ContentType *content_type)
1213 {
1214         g_free (content_type->subtype);
1215         g_free (content_type->type);
1216         
1217         g_slice_free (ContentType, content_type);
1218 }
1219
1220 static gboolean
1221 content_type_is_type (ContentType *content_type, const char *type, const char *subtype)
1222 {
1223         if (!strcmp (type, "*") || !g_ascii_strcasecmp (content_type->type, type)) {
1224                 if (!strcmp (subtype, "*")) {
1225                         /* special case */
1226                         return TRUE;
1227                 }
1228                 
1229                 if (!g_ascii_strcasecmp (content_type->subtype, subtype))
1230                         return TRUE;
1231         }
1232         
1233         return FALSE;
1234 }
1235
1236 static ContentType *
1237 parser_content_type (GMimeParser *parser)
1238 {
1239         struct _GMimeParserPrivate *priv = parser->priv;
1240         ContentType *content_type;
1241         const char *value;
1242         
1243         content_type = g_slice_new (ContentType);
1244         
1245         if (!(value = header_raw_find (priv->headers, "Content-Type", NULL)) ||
1246             !g_mime_parse_content_type (&value, &content_type->type, &content_type->subtype)) {
1247                 content_type->type = g_strdup ("text");
1248                 content_type->subtype = g_strdup ("plain");
1249         }
1250         
1251         content_type->exists = value != NULL;
1252         
1253         return content_type;
1254 }
1255
1256 static int
1257 parser_skip_line (GMimeParser *parser)
1258 {
1259         struct _GMimeParserPrivate *priv = parser->priv;
1260         register char *inptr;
1261         char *inend;
1262         int rv = 0;
1263         
1264         do {
1265                 inptr = priv->inptr;
1266                 inend = priv->inend;
1267                 *inend = '\n';
1268                 
1269                 while (*inptr != '\n')
1270                         inptr++;
1271                 
1272                 if (inptr < inend)
1273                         break;
1274                 
1275                 priv->inptr = inptr;
1276                 
1277                 if (parser_fill (parser, SCAN_HEAD) <= 0) {
1278                         inptr = priv->inptr;
1279                         rv = -1;
1280                         break;
1281                 }
1282         } while (1);
1283         
1284         priv->midline = FALSE;
1285         
1286         priv->inptr = MIN (inptr + 1, priv->inend);
1287         
1288         return rv;
1289 }
1290
1291 static int
1292 parser_step (GMimeParser *parser)
1293 {
1294         struct _GMimeParserPrivate *priv = parser->priv;
1295         
1296         switch (priv->state) {
1297         case GMIME_PARSER_STATE_ERROR:
1298                 break;
1299         case GMIME_PARSER_STATE_INIT:
1300                 priv->message_headers_begin = -1;
1301                 priv->message_headers_end = -1;
1302                 if (priv->scan_from)
1303                         priv->state = GMIME_PARSER_STATE_FROM;
1304                 else
1305                         priv->state = GMIME_PARSER_STATE_MESSAGE_HEADERS;
1306                 break;
1307         case GMIME_PARSER_STATE_FROM:
1308                 priv->message_headers_begin = -1;
1309                 priv->message_headers_end = -1;
1310                 parser_step_from (parser);
1311                 break;
1312         case GMIME_PARSER_STATE_MESSAGE_HEADERS:
1313         case GMIME_PARSER_STATE_HEADERS:
1314                 parser_step_headers (parser);
1315                 
1316                 if (priv->message_headers_begin == -1) {
1317                         priv->message_headers_begin = priv->headers_begin;
1318                         priv->message_headers_end = priv->headers_end;
1319                 }
1320                 break;
1321         case GMIME_PARSER_STATE_HEADERS_END:
1322                 if (parser_skip_line (parser) == -1)
1323                         priv->state = GMIME_PARSER_STATE_ERROR;
1324                 else
1325                         priv->state = GMIME_PARSER_STATE_CONTENT;
1326                 break;
1327         case GMIME_PARSER_STATE_CONTENT:
1328                 break;
1329         case GMIME_PARSER_STATE_COMPLETE:
1330                 break;
1331         default:
1332                 g_assert_not_reached ();
1333                 break;
1334         }
1335         
1336         return priv->state;
1337 }
1338
1339
1340 enum {
1341         FOUND_NOTHING,
1342         FOUND_EOS,
1343         FOUND_BOUNDARY,
1344         FOUND_END_BOUNDARY
1345 };
1346
1347 #define content_save(content, start, len) G_STMT_START {                     \
1348         if (content)                                                         \
1349                 g_byte_array_append (content, (unsigned char *) start, len); \
1350 } G_STMT_END
1351
1352 #define possible_boundary(scan_from, start, len)                                      \
1353                          ((scan_from && len >= 5 && !strncmp (start, "From ", 5)) ||  \
1354                           (len >= 2 && (start[0] == '-' && start[1] == '-')))
1355
1356 static int
1357 check_boundary (struct _GMimeParserPrivate *priv, const char *start, size_t len)
1358 {
1359         gint64 offset = parser_offset (priv, start);
1360         
1361         if (len > 0 && start[len - 1] == '\r')
1362                 len--;
1363         
1364         if (possible_boundary (priv->scan_from, start, len)) {
1365                 BoundaryStack *s;
1366                 
1367                 d(printf ("checking boundary '%.*s'\n", len, start));
1368                 
1369                 s = priv->bounds;
1370                 while (s) {
1371                         /* we use >= here because From lines are > 5 chars */
1372                         if (offset >= s->content_end &&
1373                             len >= s->boundarylenfinal &&
1374                             !strncmp (s->boundary, start,
1375                                       s->boundarylenfinal)) {
1376                                 d(printf ("found %s\n", s->content_end != -1 && offset >= s->content_end ?
1377                                           "end of content" : "end boundary"));
1378                                 return FOUND_END_BOUNDARY;
1379                         }
1380                         
1381                         if (len == s->boundarylen &&
1382                             !strncmp (s->boundary, start,
1383                                       s->boundarylen)) {
1384                                 d(printf ("found boundary\n"));
1385                                 return FOUND_BOUNDARY;
1386                         }
1387                         
1388                         s = s->parent;
1389                 }
1390                 
1391                 d(printf ("'%.*s' not a boundary\n", len, start));
1392         }
1393         
1394         return FOUND_NOTHING;
1395 }
1396
1397 static gboolean
1398 found_immediate_boundary (struct _GMimeParserPrivate *priv, gboolean end)
1399 {
1400         BoundaryStack *s = priv->bounds;
1401         size_t len = end ? s->boundarylenfinal : s->boundarylen;
1402         
1403         return !strncmp (priv->inptr, s->boundary, len)
1404                 && (priv->inptr[len] == '\n' || priv->inptr[len] == '\r');
1405 }
1406
1407 /* Optimization Notes:
1408  *
1409  * 1. By making the priv->realbuf char array 1 extra char longer, we
1410  * can safely set '*inend' to '\n' and not fear an ABW. Setting *inend
1411  * to '\n' means that we can eliminate having to check that inptr <
1412  * inend every trip through our inner while-loop. This cuts the number
1413  * of instructions down from ~7 to ~4, assuming the compiler does its
1414  * job correctly ;-)
1415  **/
1416
1417
1418 /* we add 2 for \r\n */
1419 #define MAX_BOUNDARY_LEN(bounds) (bounds ? bounds->boundarylenmax + 2 : 0)
1420
1421 static int
1422 parser_scan_content (GMimeParser *parser, GByteArray *content, guint *crlf)
1423 {
1424         struct _GMimeParserPrivate *priv = parser->priv;
1425         register char *inptr;
1426         char *start, *inend;
1427         size_t nleft, len;
1428         size_t atleast;
1429         int found = 0;
1430         
1431         d(printf ("scan-content\n"));
1432         
1433         priv->midline = FALSE;
1434         
1435         g_assert (priv->inptr <= priv->inend);
1436         
1437         start = inptr = priv->inptr;
1438         
1439         /* figure out minimum amount of data we need */
1440         atleast = MAX (SCAN_HEAD, MAX_BOUNDARY_LEN (priv->bounds));
1441         
1442         do {
1443         refill:
1444                 nleft = priv->inend - inptr;
1445                 if (parser_fill (parser, atleast) <= 0) {
1446                         start = priv->inptr;
1447                         found = FOUND_EOS;
1448                         break;
1449                 }
1450                 
1451                 inptr = priv->inptr;
1452                 inend = priv->inend;
1453                 /* Note: see optimization comment [1] */
1454                 *inend = '\n';
1455                 
1456                 len = (size_t) (inend - inptr);
1457                 if (priv->midline && len == nleft)
1458                         found = FOUND_EOS;
1459                 
1460                 priv->midline = FALSE;
1461                 
1462                 while (inptr < inend) {
1463                         start = inptr;
1464                         /* Note: see optimization comment [1] */
1465                         while (*inptr != '\n')
1466                                 inptr++;
1467                         
1468                         len = (size_t) (inptr - start);
1469                         
1470                         if (inptr < inend) {
1471                                 if ((found = check_boundary (priv, start, len)))
1472                                         goto boundary;
1473                                 
1474                                 inptr++;
1475                                 len++;
1476                         } else {
1477                                 /* didn't find an end-of-line */
1478                                 priv->midline = TRUE;
1479                                 
1480                                 if (!found) {
1481                                         /* not enough to tell if we found a boundary */
1482                                         priv->inptr = start;
1483                                         inptr = start;
1484                                         goto refill;
1485                                 }
1486                                 
1487                                 /* check for a boundary not ending in a \n (EOF) */
1488                                 if ((found = check_boundary (priv, start, len)))
1489                                         goto boundary;
1490                         }
1491                         
1492                         content_save (content, start, len);
1493                 }
1494                 
1495                 priv->inptr = inptr;
1496         } while (!found);
1497         
1498  boundary:
1499         
1500         /* don't chew up the boundary */
1501         priv->inptr = start;
1502         
1503         if (found != FOUND_EOS) {
1504                 if (inptr[-1] == '\r')
1505                         *crlf = 2;
1506                 else
1507                         *crlf = 1;
1508         } else {
1509                 *crlf = 0;
1510         }
1511         
1512         return found;
1513 }
1514
1515 static void
1516 parser_scan_mime_part_content (GMimeParser *parser, GMimePart *mime_part, int *found)
1517 {
1518         struct _GMimeParserPrivate *priv = parser->priv;
1519         GMimeContentEncoding encoding;
1520         GByteArray *content = NULL;
1521         GMimeDataWrapper *wrapper;
1522         GMimeStream *stream;
1523         gint64 start, end;
1524         guint crlf;
1525         
1526         g_assert (priv->state >= GMIME_PARSER_STATE_HEADERS_END);
1527         
1528         if (priv->persist_stream && priv->seekable)
1529                 start = parser_offset (priv, NULL);
1530         else
1531                 content = g_byte_array_new ();
1532         
1533         *found = parser_scan_content (parser, content, &crlf);
1534         if (*found != FOUND_EOS) {
1535                 /* last '\n' belongs to the boundary */
1536                 if (priv->persist_stream && priv->seekable)
1537                         end = parser_offset (priv, NULL) - crlf;
1538                 else if (content->len > crlf)
1539                         g_byte_array_set_size (content, content->len - crlf);
1540                 else
1541                         g_byte_array_set_size (content, 0);
1542         } else if (priv->persist_stream && priv->seekable) {
1543                 end = parser_offset (priv, NULL);
1544         }
1545         
1546         encoding = g_mime_part_get_content_encoding (mime_part);
1547         
1548         if (priv->persist_stream && priv->seekable)
1549                 stream = g_mime_stream_substream (priv->stream, start, end);
1550         else
1551                 stream = g_mime_stream_mem_new_with_byte_array (content);
1552         
1553         wrapper = g_mime_data_wrapper_new_with_stream (stream, encoding);
1554         g_mime_part_set_content_object (mime_part, wrapper);
1555         g_object_unref (wrapper);
1556         g_object_unref (stream);
1557 }
1558
1559 static void
1560 parser_scan_message_part (GMimeParser *parser, GMimeMessagePart *mpart, int *found)
1561 {
1562         struct _GMimeParserPrivate *priv = parser->priv;
1563         ContentType *content_type;
1564         GMimeMessage *message;
1565         GMimeObject *object;
1566         GMimeStream *stream;
1567         HeaderRaw *header;
1568         
1569         g_assert (priv->state == GMIME_PARSER_STATE_CONTENT);
1570         
1571         if (priv->bounds != NULL) {
1572                 /* Check for the possibility of an empty message/rfc822 part. */
1573                 register char *inptr;
1574                 size_t atleast;
1575                 char *inend;
1576                 
1577                 /* figure out minimum amount of data we need */
1578                 atleast = MAX (SCAN_HEAD, MAX_BOUNDARY_LEN (priv->bounds));
1579                 
1580                 if (parser_fill (parser, atleast) <= 0) {
1581                         *found = FOUND_EOS;
1582                         return;
1583                 }
1584                 
1585                 inptr = priv->inptr;
1586                 inend = priv->inend;
1587                 /* Note: see optimization comment [1] */
1588                 *inend = '\n';
1589                 
1590                 while (*inptr != '\n')
1591                         inptr++;
1592                 
1593                 *found = check_boundary (priv, priv->inptr, inptr - priv->inptr);
1594                 switch (*found) {
1595                 case FOUND_END_BOUNDARY:
1596                         /* ignore "From " boundaries, boken mailers tend to include these lines... */
1597                         if (strncmp (priv->inptr, "From ", 5) != 0)
1598                                 return;
1599                         break;
1600                 case FOUND_BOUNDARY:
1601                         return;
1602                 }
1603         }
1604         
1605         /* get the headers */
1606         priv->state = GMIME_PARSER_STATE_HEADERS;
1607         if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
1608                 /* Note: currently cannot happen because
1609                  * parser_step_headers() never returns error */
1610                 *found = FOUND_EOS;
1611                 return;
1612         }
1613         
1614         message = g_mime_message_new (FALSE);
1615         header = priv->headers;
1616         while (header) {
1617                 if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0)
1618                         g_mime_object_append_header ((GMimeObject *) message, header->name, header->value);
1619                 header = header->next;
1620         }
1621         
1622         content_type = parser_content_type (parser);
1623         if (content_type_is_type (content_type, "multipart", "*"))
1624                 object = parser_construct_multipart (parser, content_type, TRUE, found);
1625         else
1626                 object = parser_construct_leaf_part (parser, content_type, TRUE, found);
1627         
1628         content_type_destroy (content_type);
1629         message->mime_part = object;
1630         
1631         /* set the same raw header stream on the message's header-list */
1632         if ((stream = g_mime_header_list_get_stream (object->headers)))
1633                 g_mime_header_list_set_stream (((GMimeObject *) message)->headers, stream);
1634         
1635         g_mime_message_part_set_message (mpart, message);
1636         g_object_unref (message);
1637 }
1638
1639 static GMimeObject *
1640 parser_construct_leaf_part (GMimeParser *parser, ContentType *content_type, gboolean toplevel, int *found)
1641 {
1642         struct _GMimeParserPrivate *priv = parser->priv;
1643         GMimeObject *object;
1644         GMimeStream *stream;
1645         HeaderRaw *header;
1646         
1647         g_assert (priv->state >= GMIME_PARSER_STATE_HEADERS_END);
1648         
1649         object = g_mime_object_new_type (content_type->type, content_type->subtype);
1650         
1651         if (!content_type->exists) {
1652                 GMimeContentType *mime_type;
1653                 
1654                 mime_type = g_mime_content_type_new ("text", "plain");
1655                 _g_mime_object_set_content_type (object, mime_type);
1656                 g_object_unref (mime_type);
1657         }
1658         
1659         header = priv->headers;
1660         while (header) {
1661                 if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8))
1662                         g_mime_object_append_header (object, header->name, header->value);
1663                 header = header->next;
1664         }
1665         
1666         header_raw_clear (&priv->headers);
1667         
1668         /* set the raw header stream on the header-list */
1669         if (priv->persist_stream && priv->seekable)
1670                 stream = g_mime_stream_substream (priv->stream, priv->headers_begin, priv->headers_end);
1671         else
1672                 stream = g_mime_stream_mem_new_with_buffer (priv->rawbuf, priv->rawptr - priv->rawbuf);
1673         
1674         g_mime_header_list_set_stream (object->headers, stream);
1675         g_object_unref (stream);
1676         
1677         raw_header_reset (priv);
1678         
1679         if (priv->state == GMIME_PARSER_STATE_HEADERS_END) {
1680                 /* skip empty line after headers */
1681                 if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
1682                         *found = FOUND_EOS;
1683                         return object;
1684                 }
1685         }
1686         
1687         if (GMIME_IS_MESSAGE_PART (object))
1688                 parser_scan_message_part (parser, (GMimeMessagePart *) object, found);
1689         else
1690                 parser_scan_mime_part_content (parser, (GMimePart *) object, found);
1691         
1692         return object;
1693 }
1694
1695 static void
1696 crlf2lf (char *in)
1697 {
1698         register char *inptr = in;
1699         register char *outptr;
1700         
1701         while (*inptr != '\0' && !(inptr[0] == '\r' && inptr[1] == '\n'))
1702                 inptr++;
1703         
1704         if (*inptr == '\0')
1705                 return;
1706         
1707         outptr = inptr++;
1708         
1709         while (*inptr != '\0') {
1710                 while (*inptr != '\0' && !(inptr[0] == '\r' && inptr[1] == '\n'))
1711                         *outptr++ = *inptr++;
1712                 
1713                 if (*inptr == '\r')
1714                         inptr++;
1715         }
1716         
1717         *outptr = '\0';
1718 }
1719
1720 static int
1721 parser_scan_multipart_face (GMimeParser *parser, GMimeMultipart *multipart, gboolean preface)
1722 {
1723         GByteArray *buffer;
1724         char *face;
1725         guint crlf;
1726         int found;
1727         
1728         buffer = g_byte_array_new ();
1729         found = parser_scan_content (parser, buffer, &crlf);
1730         
1731         if (buffer->len >= crlf) {
1732                 /* last '\n' belongs to the boundary */
1733                 g_byte_array_set_size (buffer, buffer->len + 1);
1734                 buffer->data[buffer->len - crlf - 1] = '\0';
1735                 face = (char *) buffer->data;
1736                 crlf2lf (face);
1737                 
1738                 if (preface)
1739                         g_mime_multipart_set_preface (multipart, face);
1740                 else
1741                         g_mime_multipart_set_postface (multipart, face);
1742         }
1743         
1744         g_byte_array_free (buffer, TRUE);
1745         
1746         return found;
1747 }
1748
1749 #define parser_scan_multipart_preface(parser, multipart) parser_scan_multipart_face (parser, multipart, TRUE)
1750 #define parser_scan_multipart_postface(parser, multipart) parser_scan_multipart_face (parser, multipart, FALSE)
1751
1752 static int
1753 parser_scan_multipart_subparts (GMimeParser *parser, GMimeMultipart *multipart)
1754 {
1755         struct _GMimeParserPrivate *priv = parser->priv;
1756         ContentType *content_type;
1757         GMimeObject *subpart;
1758         int found;
1759         
1760         do {
1761                 /* skip over the boundary marker */
1762                 if (parser_skip_line (parser) == -1) {
1763                         found = FOUND_EOS;
1764                         break;
1765                 }
1766                 
1767                 /* get the headers */
1768                 priv->state = GMIME_PARSER_STATE_HEADERS;
1769                 if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
1770                         found = FOUND_EOS;
1771                         break;
1772                 }
1773                 
1774                 if (priv->state == GMIME_PARSER_STATE_COMPLETE && priv->headers == NULL) {
1775                         found = FOUND_END_BOUNDARY;
1776                         break;
1777                 }
1778                 
1779                 content_type = parser_content_type (parser);
1780                 if (content_type_is_type (content_type, "multipart", "*"))
1781                         subpart = parser_construct_multipart (parser, content_type, FALSE, &found);
1782                 else
1783                         subpart = parser_construct_leaf_part (parser, content_type, FALSE, &found);
1784                 
1785                 g_mime_multipart_add (multipart, subpart);
1786                 content_type_destroy (content_type);
1787                 g_object_unref (subpart);
1788         } while (found == FOUND_BOUNDARY && found_immediate_boundary (priv, FALSE));
1789         
1790         return found;
1791 }
1792
1793 static GMimeObject *
1794 parser_construct_multipart (GMimeParser *parser, ContentType *content_type, gboolean toplevel, int *found)
1795 {
1796         struct _GMimeParserPrivate *priv = parser->priv;
1797         GMimeMultipart *multipart;
1798         const char *boundary;
1799         GMimeObject *object;
1800         GMimeStream *stream;
1801         HeaderRaw *header;
1802         
1803         g_assert (priv->state >= GMIME_PARSER_STATE_HEADERS_END);
1804         
1805         object = g_mime_object_new_type (content_type->type, content_type->subtype);
1806         
1807         header = priv->headers;
1808         while (header) {
1809                 if (!toplevel || !g_ascii_strncasecmp (header->name, "Content-", 8))
1810                         g_mime_object_append_header (object, header->name, header->value);
1811                 header = header->next;
1812         }
1813         
1814         header_raw_clear (&priv->headers);
1815         
1816         /* set the raw header stream on the header-list */
1817         if (priv->persist_stream && priv->seekable)
1818                 stream = g_mime_stream_substream (priv->stream, priv->headers_begin, priv->headers_end);
1819         else
1820                 stream = g_mime_stream_mem_new_with_buffer (priv->rawbuf, priv->rawptr - priv->rawbuf);
1821         
1822         g_mime_header_list_set_stream (object->headers, stream);
1823         g_object_unref (stream);
1824         
1825         raw_header_reset (priv);
1826         
1827         multipart = (GMimeMultipart *) object;
1828         
1829         if (priv->state == GMIME_PARSER_STATE_HEADERS_END) {
1830                 /* skip empty line after headers */
1831                 if (parser_step (parser) == GMIME_PARSER_STATE_ERROR) {
1832                         *found = FOUND_EOS;
1833                         return object;
1834                 }
1835         }
1836         
1837         boundary = g_mime_object_get_content_type_parameter (object, "boundary");
1838         if (boundary) {
1839                 parser_push_boundary (parser, boundary);
1840                 
1841                 *found = parser_scan_multipart_preface (parser, multipart);
1842                 
1843                 if (*found == FOUND_BOUNDARY)
1844                         *found = parser_scan_multipart_subparts (parser, multipart);
1845                 
1846                 if (*found == FOUND_END_BOUNDARY && found_immediate_boundary (priv, TRUE)) {
1847                         /* eat end boundary */
1848                         parser_skip_line (parser);
1849                         parser_pop_boundary (parser);
1850                         *found = parser_scan_multipart_postface (parser, multipart);
1851                 } else {
1852                         parser_pop_boundary (parser);
1853                 }
1854         } else {
1855                 w(g_warning ("multipart without boundary encountered"));
1856                 /* this will scan everything into the preface */
1857                 *found = parser_scan_multipart_preface (parser, multipart);
1858         }
1859         
1860         return object;
1861 }
1862
1863 static GMimeObject *
1864 parser_construct_part (GMimeParser *parser)
1865 {
1866         struct _GMimeParserPrivate *priv = parser->priv;
1867         ContentType *content_type;
1868         GMimeObject *object;
1869         int found;
1870         
1871         /* get the headers */
1872         priv->state = GMIME_PARSER_STATE_HEADERS;
1873         while (priv->state < GMIME_PARSER_STATE_HEADERS_END) {
1874                 if (parser_step (parser) == GMIME_PARSER_STATE_ERROR)
1875                         return NULL;
1876         }
1877         
1878         content_type = parser_content_type (parser);
1879         if (content_type_is_type (content_type, "multipart", "*"))
1880                 object = parser_construct_multipart (parser, content_type, TRUE, &found);
1881         else
1882                 object = parser_construct_leaf_part (parser, content_type, TRUE, &found);
1883         
1884         content_type_destroy (content_type);
1885         
1886         return object;
1887 }
1888
1889
1890 /**
1891  * g_mime_parser_construct_part:
1892  * @parser: a #GMimeParser context
1893  *
1894  * Constructs a MIME part from @parser.
1895  *
1896  * Returns: a MIME part based on @parser or %NULL on fail.
1897  **/
1898 GMimeObject *
1899 g_mime_parser_construct_part (GMimeParser *parser)
1900 {
1901         g_return_val_if_fail (GMIME_IS_PARSER (parser), NULL);
1902         
1903         return parser_construct_part (parser);
1904 }
1905
1906
1907 static GMimeMessage *
1908 parser_construct_message (GMimeParser *parser)
1909 {
1910         struct _GMimeParserPrivate *priv = parser->priv;
1911         unsigned long content_length = ULONG_MAX;
1912         ContentType *content_type;
1913         GMimeMessage *message;
1914         GMimeObject *object;
1915         GMimeStream *stream;
1916         HeaderRaw *header;
1917         char *endptr;
1918         int found;
1919         
1920         /* scan the from-line if we are parsing an mbox */
1921         while (priv->state != GMIME_PARSER_STATE_MESSAGE_HEADERS) {
1922                 if (parser_step (parser) == GMIME_PARSER_STATE_ERROR)
1923                         return NULL;
1924         }
1925         
1926         /* parse the headers */
1927         while (priv->state < GMIME_PARSER_STATE_HEADERS_END) {
1928                 if (parser_step (parser) == GMIME_PARSER_STATE_ERROR)
1929                         return NULL;
1930         }
1931         
1932         message = g_mime_message_new (FALSE);
1933         header = priv->headers;
1934         while (header) {
1935                 if (priv->respect_content_length && !g_ascii_strcasecmp (header->name, "Content-Length")) {
1936                         content_length = strtoul (header->value, &endptr, 10);
1937                         if (endptr == header->value)
1938                                 content_length = ULONG_MAX;
1939                 }
1940                 
1941                 if (g_ascii_strncasecmp (header->name, "Content-", 8) != 0)
1942                         g_mime_object_append_header ((GMimeObject *) message, header->name, header->value);
1943                 header = header->next;
1944         }
1945         
1946         if (priv->scan_from) {
1947                 parser_push_boundary (parser, MBOX_BOUNDARY);
1948                 if (priv->respect_content_length && content_length < ULONG_MAX)
1949                         priv->bounds->content_end = parser_offset (priv, NULL) + content_length;
1950         }
1951         
1952         content_type = parser_content_type (parser);
1953         if (content_type_is_type (content_type, "multipart", "*"))
1954                 object = parser_construct_multipart (parser, content_type, TRUE, &found);
1955         else
1956                 object = parser_construct_leaf_part (parser, content_type, TRUE, &found);
1957         
1958         content_type_destroy (content_type);
1959         message->mime_part = object;
1960         
1961         /* set the same raw header stream on the message's header-list */
1962         if ((stream = g_mime_header_list_get_stream (object->headers)))
1963                 g_mime_header_list_set_stream (((GMimeObject *) message)->headers, stream);
1964         
1965         if (priv->scan_from) {
1966                 priv->state = GMIME_PARSER_STATE_FROM;
1967                 parser_pop_boundary (parser);
1968         }
1969         
1970         return message;
1971 }
1972
1973
1974 /**
1975  * g_mime_parser_construct_message:
1976  * @parser: a #GMimeParser context
1977  *
1978  * Constructs a MIME message from @parser.
1979  *
1980  * Returns: a MIME message or %NULL on fail.
1981  **/
1982 GMimeMessage *
1983 g_mime_parser_construct_message (GMimeParser *parser)
1984 {
1985         g_return_val_if_fail (GMIME_IS_PARSER (parser), NULL);
1986         
1987         return parser_construct_message (parser);
1988 }
1989
1990
1991 /**
1992  * g_mime_parser_get_from:
1993  * @parser: a #GMimeParser context
1994  *
1995  * Gets the mbox-style From-line of the most recently parsed message
1996  * (gotten from g_mime_parser_construct_message()).
1997  *
1998  * Returns: the mbox-style From-line of the most recently parsed
1999  * message or %NULL on error.
2000  **/
2001 char *
2002 g_mime_parser_get_from (GMimeParser *parser)
2003 {
2004         struct _GMimeParserPrivate *priv;
2005         
2006         g_return_val_if_fail (GMIME_IS_PARSER (parser), NULL);
2007         
2008         priv = parser->priv;
2009         if (!priv->scan_from)
2010                 return NULL;
2011         
2012         if (priv->from_line->len)
2013                 return g_strndup ((char *) priv->from_line->data, priv->from_line->len);
2014         
2015         return NULL;
2016 }
2017
2018
2019 /**
2020  * g_mime_parser_get_from_offset:
2021  * @parser: a #GMimeParser context
2022  *
2023  * Gets the offset of the most recently parsed mbox-style From-line
2024  * (gotten from g_mime_parser_construct_message()).
2025  *
2026  * Returns: the offset of the most recently parsed mbox-style From-line
2027  * or %-1 on error.
2028  **/
2029 gint64
2030 g_mime_parser_get_from_offset (GMimeParser *parser)
2031 {
2032         struct _GMimeParserPrivate *priv;
2033         
2034         g_return_val_if_fail (GMIME_IS_PARSER (parser), -1);
2035         
2036         priv = parser->priv;
2037         if (!priv->scan_from)
2038                 return -1;
2039         
2040         return priv->from_offset;
2041 }
2042
2043
2044 /**
2045  * g_mime_parser_get_headers_begin:
2046  * @parser: a #GMimeParser context
2047  *
2048  * Gets the stream offset of the beginning of the headers of the most
2049  * recently parsed message.
2050  *
2051  * Returns: the offset of the beginning of the headers of the most
2052  * recently parsed message or %-1 on error.
2053  **/
2054 gint64
2055 g_mime_parser_get_headers_begin (GMimeParser *parser)
2056 {
2057         g_return_val_if_fail (GMIME_IS_PARSER (parser), -1);
2058         
2059         return parser->priv->message_headers_begin;
2060 }
2061
2062
2063 /**
2064  * g_mime_parser_get_headers_end:
2065  * @parser: a #GMimeParser context
2066  *
2067  * Gets the stream offset of the end of the headers of the most
2068  * recently parsed message.
2069  *
2070  * Returns: the offset of the end of the headers of the most recently
2071  * parsed message or %-1 on error.
2072  **/
2073 gint64
2074 g_mime_parser_get_headers_end (GMimeParser *parser)
2075 {
2076         g_return_val_if_fail (GMIME_IS_PARSER (parser), -1);
2077         
2078         return parser->priv->message_headers_end;
2079 }