Fix FSF address (Tobias Mueller, #470445)
[platform/upstream/evolution-data-server.git] / camel / camel-mime-parser.c
1 /* -*- Mode: C; tab-width: 8; indent-tabs-mode: t; c-basic-offset: 8 -*- */
2 /*
3  *  Copyright (C) 2000-2003 Ximian Inc.
4  *
5  *  Authors: Michael Zucchi <notzed@ximian.com>
6  *
7  * This program is free software; you can redistribute it and/or
8  * modify it under the terms of version 2 of the GNU Lesser General Public
9  * License as published by the Free Software Foundation.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14  * General Public License for more details.
15  *
16  * You should have received a copy of the GNU Lesser General Public
17  * License along with this program; if not, write to the
18  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19  * Boston, MA 02110-1301, USA.
20  */
21
22 /* What should hopefully be a fast mail parser */
23
24 /* Do not change this code without asking me (Michael Zucchi) first
25
26    There is almost always a reason something was done a certain way.
27  */
28
29 #include <errno.h>
30 #include <stdio.h>
31 #include <string.h>
32 #include <unistd.h>
33 #include <sys/stat.h>
34 #include <sys/types.h>
35
36 #include <glib.h>
37
38 #include <libedataserver/e-memory.h>
39
40 #include "camel-mime-filter.h"
41 #include "camel-mime-parser.h"
42 #include "camel-mime-utils.h"
43 #include "camel-private.h"
44 #include "camel-seekable-stream.h"
45 #include "camel-stream.h"
46
47 #define r(x) 
48 #define h(x) 
49 #define c(x) 
50 #define d(x) 
51
52 /*#define PRESERVE_HEADERS*/
53
54 /*#define PURIFY*/
55
56 #define MEMPOOL
57
58 #ifdef PURIFY
59 int inend_id = -1,
60   inbuffer_id = -1;
61 #endif
62
63 #define SCAN_BUF 4096           /* size of read buffer */
64 #define SCAN_HEAD 128           /* headroom guaranteed to be before each read buffer */
65
66 /* a little hacky, but i couldn't be bothered renaming everything */
67 #define _header_scan_state _CamelMimeParserPrivate
68 #define _PRIVATE(o) (((CamelMimeParser *)(o))->priv)
69
70 struct _header_scan_state {
71
72     /* global state */
73
74         camel_mime_parser_state_t state;
75
76         /* for building headers during scanning */
77         char *outbuf;
78         char *outptr;
79         char *outend;
80
81         int fd;                 /* input for a fd input */
82         CamelStream *stream;    /* or for a stream */
83
84         int ioerrno;            /* io error state */
85
86         /* for scanning input buffers */
87         char *realbuf;          /* the real buffer, SCAN_HEAD*2 + SCAN_BUF bytes */
88         char *inbuf;            /* points to a subset of the allocated memory, the underflow */
89         char *inptr;            /* (upto SCAN_HEAD) is for use by filters so they dont copy all data */
90         char *inend;
91
92         int atleast;
93
94         off_t seek;             /* current offset to start of buffer */
95         int unstep;             /* how many states to 'unstep' (repeat the current state) */
96
97         unsigned int midline:1;         /* are we mid-line interrupted? */
98         unsigned int scan_from:1;       /* do we care about From lines? */
99         unsigned int scan_pre_from:1;   /* do we return pre-from data? */
100         unsigned int eof:1;             /* reached eof? */
101
102         off_t start_of_from;    /* where from started */
103         off_t start_of_boundary; /* where the last boundary started */
104         off_t start_of_headers; /* where headers started from the last scan */
105
106         off_t header_start;     /* start of last header, or -1 */
107
108         /* filters to apply to all content before output */
109         int filterid;           /* id of next filter */
110         struct _header_scan_filter *filters;
111
112     /* per message/part info */
113         struct _header_scan_stack *parts;
114
115 };
116
117 struct _header_scan_stack {
118         struct _header_scan_stack *parent;
119
120         camel_mime_parser_state_t savestate; /* state at invocation of this part */
121
122 #ifdef MEMPOOL
123         EMemPool *pool;         /* memory pool to keep track of headers/etc at this level */
124 #endif
125         struct _camel_header_raw *headers;      /* headers for this part */
126
127         CamelContentType *content_type;
128
129         /* I dont use GString's casue you can't efficiently append a buffer to them */
130         GByteArray *pretext;    /* for multipart types, save the pre-boundary data here */
131         GByteArray *posttext;   /* for multipart types, save the post-boundary data here */
132         int prestage;           /* used to determine if it is a pre-boundary or post-boundary data segment */
133
134         GByteArray *from_line;  /* the from line */
135
136         char *boundary;         /* for multipart/ * boundaries, including leading -- and trailing -- for the final part */
137         int boundarylen;        /* actual length of boundary, including leading -- if there is one */
138         int boundarylenfinal;   /* length of boundary, including trailing -- if there is one */
139         int atleast;            /* the biggest boundary from here to the parent */
140 };
141
142 struct _header_scan_filter {
143         struct _header_scan_filter *next;
144         int id;
145         CamelMimeFilter *filter;
146 };
147
148 static void folder_scan_step(struct _header_scan_state *s, char **databuffer, size_t *datalength);
149 static void folder_scan_drop_step(struct _header_scan_state *s);
150 static int folder_scan_init_with_fd(struct _header_scan_state *s, int fd);
151 static int folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream);
152 static struct _header_scan_state *folder_scan_init(void);
153 static void folder_scan_close(struct _header_scan_state *s);
154 static struct _header_scan_stack *folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, size_t *length);
155 static struct _header_scan_stack *folder_scan_header(struct _header_scan_state *s, int *lastone);
156 static int folder_scan_skip_line(struct _header_scan_state *s, GByteArray *save);
157 static off_t folder_seek(struct _header_scan_state *s, off_t offset, int whence);
158 static off_t folder_tell(struct _header_scan_state *s);
159 static int folder_read(struct _header_scan_state *s);
160 static void folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h);
161
162 #ifdef MEMPOOL
163 static void header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h, char *header, int offset);
164 #endif
165
166 static void camel_mime_parser_class_init (CamelMimeParserClass *klass);
167 static void camel_mime_parser_init       (CamelMimeParser *obj);
168
169 #if d(!)0
170 static char *states[] = {
171         "CAMEL_MIME_PARSER_STATE_INITIAL",
172         "CAMEL_MIME_PARSER_STATE_PRE_FROM",     /* pre-from data */
173         "CAMEL_MIME_PARSER_STATE_FROM",         /* got 'From' line */
174         "CAMEL_MIME_PARSER_STATE_HEADER",               /* toplevel header */
175         "CAMEL_MIME_PARSER_STATE_BODY",         /* scanning body of message */
176         "CAMEL_MIME_PARSER_STATE_MULTIPART",    /* got multipart header */
177         "CAMEL_MIME_PARSER_STATE_MESSAGE",      /* rfc822/news message */
178
179         "CAMEL_MIME_PARSER_STATE_PART",         /* part of a multipart */
180
181         "CAMEL_MIME_PARSER_STATE_EOF",          /* end of file */
182         "CAMEL_MIME_PARSER_STATE_PRE_FROM_END",
183         "CAMEL_MIME_PARSER_STATE_FROM_END",
184         "CAMEL_MIME_PARSER_STATE_HEAER_END",
185         "CAMEL_MIME_PARSER_STATE_BODY_END",
186         "CAMEL_MIME_PARSER_STATE_MULTIPART_END",
187         "CAMEL_MIME_PARSER_STATE_MESSAGE_END",
188 };
189 #endif
190
191 static CamelObjectClass *camel_mime_parser_parent;
192
193 static void
194 camel_mime_parser_class_init (CamelMimeParserClass *klass)
195 {
196         camel_mime_parser_parent = camel_type_get_global_classfuncs (camel_object_get_type ());
197 }
198
199 static void
200 camel_mime_parser_init (CamelMimeParser *obj)
201 {
202         struct _header_scan_state *s;
203
204         s = folder_scan_init();
205         _PRIVATE(obj) = s;
206 }
207
208 static void
209 camel_mime_parser_finalise(CamelObject *o)
210 {
211         struct _header_scan_state *s = _PRIVATE(o);
212 #ifdef PURIFY
213         purify_watch_remove_all();
214 #endif
215         folder_scan_close(s);
216 }
217
218 CamelType
219 camel_mime_parser_get_type (void)
220 {
221         static CamelType type = CAMEL_INVALID_TYPE;
222         
223         if (type == CAMEL_INVALID_TYPE) {
224                 type = camel_type_register (camel_object_get_type (), "CamelMimeParser",
225                                             sizeof (CamelMimeParser),
226                                             sizeof (CamelMimeParserClass),
227                                             (CamelObjectClassInitFunc) camel_mime_parser_class_init,
228                                             NULL,
229                                             (CamelObjectInitFunc) camel_mime_parser_init,
230                                             (CamelObjectFinalizeFunc) camel_mime_parser_finalise);
231         }
232         
233         return type;
234 }
235
236 /**
237  * camel_mime_parser_new:
238  *
239  * Create a new CamelMimeParser object.
240  * 
241  * Return value: A new CamelMimeParser widget.
242  **/
243 CamelMimeParser *
244 camel_mime_parser_new (void)
245 {
246         CamelMimeParser *new = CAMEL_MIME_PARSER ( camel_object_new (camel_mime_parser_get_type ()));
247         return new;
248 }
249
250
251 /**
252  * camel_mime_parser_filter_add:
253  * @m: 
254  * @mf: 
255  * 
256  * Add a filter that will be applied to any body content before it is passed
257  * to the caller.  Filters may be pipelined to perform multi-pass operations
258  * on the content, and are applied in the order they were added.
259  *
260  * Note that filters are only applied to the body content of messages, and once
261  * a filter has been set, all content returned by a filter_step() with a state
262  * of CAMEL_MIME_PARSER_STATE_BODY will have passed through the filter.
263  * 
264  * Return value: An id that may be passed to filter_remove() to remove
265  * the filter, or -1 if the operation failed.
266  **/
267 int
268 camel_mime_parser_filter_add(CamelMimeParser *m, CamelMimeFilter *mf)
269 {
270         struct _header_scan_state *s = _PRIVATE(m);
271         struct _header_scan_filter *f, *new;
272
273         new = g_malloc(sizeof(*new));
274         new->filter = mf;
275         new->id = s->filterid++;
276         if (s->filterid == -1)
277                 s->filterid++;
278         new->next = 0;
279         camel_object_ref((CamelObject *)mf);
280
281         /* yes, this is correct, since 'next' is the first element of the struct */
282         f = (struct _header_scan_filter *)&s->filters;
283         while (f->next)
284                 f = f->next;
285         f->next = new;
286         return new->id;
287 }
288
289 /**
290  * camel_mime_parser_filter_remove:
291  * @m: 
292  * @id: 
293  * 
294  * Remove a processing filter from the pipeline.  There is no
295  * restriction on the order the filters can be removed.
296  **/
297 void
298 camel_mime_parser_filter_remove(CamelMimeParser *m, int id)
299 {
300         struct _header_scan_state *s = _PRIVATE(m);
301         struct _header_scan_filter *f, *old;
302         
303         f = (struct _header_scan_filter *)&s->filters;
304         while (f && f->next) {
305                 old = f->next;
306                 if (old->id == id) {
307                         camel_object_unref((CamelObject *)old->filter);
308                         f->next = old->next;
309                         g_free(old);
310                         /* there should only be a single matching id, but
311                            scan the whole lot anyway */
312                 }
313                 f = f->next;
314         }
315 }
316
317 /**
318  * camel_mime_parser_header:
319  * @m: 
320  * @name: Name of header.
321  * @offset: Pointer that can receive the offset of the header in
322  * the stream from the start of parsing.
323  * 
324  * Lookup a header by name.
325  * 
326  * Return value: The header value, or NULL if the header is not
327  * defined.
328  **/
329 const char *
330 camel_mime_parser_header(CamelMimeParser *m, const char *name, int *offset)
331 {
332         struct _header_scan_state *s = _PRIVATE(m);
333
334         if (s->parts &&
335             s->parts->headers) {
336                 return camel_header_raw_find(&s->parts->headers, name, offset);
337         }
338         return NULL;
339 }
340
341 /**
342  * camel_mime_parser_headers_raw:
343  * @m: 
344  * 
345  * Get the list of the raw headers which are defined for the
346  * current state of the parser.  These headers are valid
347  * until the next call to parser_step(), or parser_drop_step().
348  * 
349  * Return value: The raw headers, or NULL if there are no headers
350  * defined for the current part or state.  These are READ ONLY.
351  **/
352 struct _camel_header_raw *
353 camel_mime_parser_headers_raw(CamelMimeParser *m)
354 {
355         struct _header_scan_state *s = _PRIVATE(m);
356
357         if (s->parts)
358                 return s->parts->headers;
359         return NULL;
360 }
361
362 static const char *
363 byte_array_to_string(GByteArray *array)
364 {
365         if (array == NULL)
366                 return NULL;
367
368         if (array->len == 0 || array->data[array->len-1] != '\0')
369                 g_byte_array_append(array, "", 1);
370
371         return (const char *) array->data;
372 }
373
374 /**
375  * camel_mime_parser_preface:
376  * @m: 
377  * 
378  * Retrieve the preface text for the current multipart.
379  * Can only be used when the state is CAMEL_MIME_PARSER_STATE_MULTIPART_END.
380  * 
381  * Return value: The preface text, or NULL if there wasn't any.
382  **/
383 const char *
384 camel_mime_parser_preface(CamelMimeParser *m)
385 {
386         struct _header_scan_state *s = _PRIVATE(m);
387
388         if (s->parts)
389                 return byte_array_to_string(s->parts->pretext);
390
391         return NULL;
392 }
393
394 /**
395  * camel_mime_parser_postface:
396  * @m: 
397  * 
398  * Retrieve the postface text for the current multipart.
399  * Only returns valid data when the current state if
400  * CAMEL_MIME_PARSER_STATE_MULTIPART_END.
401  * 
402  * Return value: The postface text, or NULL if there wasn't any.
403  **/
404 const char *
405 camel_mime_parser_postface(CamelMimeParser *m)
406 {
407         struct _header_scan_state *s = _PRIVATE(m);
408
409         if (s->parts)
410                 return byte_array_to_string(s->parts->posttext);
411
412         return NULL;
413 }
414
415 /**
416  * camel_mime_parser_from_line:
417  * @m: 
418  * 
419  * Get the last scanned "From " line, from a recently scanned from.
420  * This should only be called in the CAMEL_MIME_PARSER_STATE_FROM state.  The
421  * from line will include the closing \n found (if there was one).
422  *
423  * The return value will remain valid while in the CAMEL_MIME_PARSER_STATE_FROM
424  * state, or any deeper state.
425  * 
426  * Return value: The From line, or NULL if called out of context.
427  **/
428 const char *
429 camel_mime_parser_from_line(CamelMimeParser *m)
430 {
431         struct _header_scan_state *s = _PRIVATE(m);
432
433         if (s->parts)
434                 return byte_array_to_string(s->parts->from_line);
435
436         return NULL;
437 }
438
439 /**
440  * camel_mime_parser_init_with_fd:
441  * @m: 
442  * @fd: A valid file descriptor.
443  * 
444  * Initialise the scanner with an fd.  The scanner's offsets
445  * will be relative to the current file position of the file
446  * descriptor.  As a result, seekable descritors should
447  * be seeked using the parser seek functions.
448  *
449  * Return value: Returns -1 on error.
450  **/
451 int
452 camel_mime_parser_init_with_fd(CamelMimeParser *m, int fd)
453 {
454         struct _header_scan_state *s = _PRIVATE(m);
455
456         return folder_scan_init_with_fd(s, fd);
457 }
458
459 /**
460  * camel_mime_parser_init_with_stream:
461  * @m: 
462  * @stream: 
463  * 
464  * Initialise the scanner with a source stream.  The scanner's
465  * offsets will be relative to the current file position of
466  * the stream.  As a result, seekable streams should only
467  * be seeked using the parser seek function.
468  * 
469  * Return value: -1 on error.
470  **/
471 int
472 camel_mime_parser_init_with_stream(CamelMimeParser *m, CamelStream *stream)
473 {
474         struct _header_scan_state *s = _PRIVATE(m);
475
476         return folder_scan_init_with_stream(s, stream);
477 }
478
479 /**
480  * camel_mime_parser_scan_from:
481  * @parser: MIME parser object
482  * @scan_from: #TRUE if the scanner should scan From lines.
483  * 
484  * Tell the scanner if it should scan "^From " lines or not.
485  *
486  * If the scanner is scanning from lines, two additional
487  * states CAMEL_MIME_PARSER_STATE_FROM and CAMEL_MIME_PARSER_STATE_FROM_END will be returned
488  * to the caller during parsing.
489  *
490  * This may also be preceeded by an optional
491  * CAMEL_MIME_PARSER_STATE_PRE_FROM state which contains the scanned data
492  * found before the From line is encountered.  See also
493  * scan_pre_from().
494  **/
495 void
496 camel_mime_parser_scan_from (CamelMimeParser *parser, gboolean scan_from)
497 {
498         struct _header_scan_state *s = _PRIVATE (parser);
499         
500         s->scan_from = scan_from;
501 }
502
503 /**
504  * camel_mime_parser_scan_pre_from:
505  * @parser: MIME parser object
506  * @scan_pre_from: #TRUE if we want to get pre-from data.
507  * 
508  * Tell the scanner whether we want to know abou the pre-from
509  * data during a scan.  If we do, then we may get an additional
510  * state CAMEL_MIME_PARSER_STATE_PRE_FROM which returns the specified data.
511  **/
512 void
513 camel_mime_parser_scan_pre_from (CamelMimeParser *parser, gboolean scan_pre_from)
514 {
515         struct _header_scan_state *s = _PRIVATE (parser);
516         
517         s->scan_pre_from = scan_pre_from;
518 }
519
520 /**
521  * camel_mime_parser_content_type:
522  * @parser: MIME parser object
523  * 
524  * Get the content type defined in the current part.
525  * 
526  * Return value: A content_type structure, or NULL if there
527  * is no content-type defined for this part of state of the
528  * parser.
529  **/
530 CamelContentType *
531 camel_mime_parser_content_type (CamelMimeParser *parser)
532 {
533         struct _header_scan_state *s = _PRIVATE (parser);
534         
535         /* FIXME: should this search up until it's found the 'right'
536            content-type?  can it? */
537         if (s->parts)
538                 return s->parts->content_type;
539         
540         return NULL;
541 }
542
543 /**
544  * camel_mime_parser_unstep:
545  * @parser: MIME parser object
546  * 
547  * Cause the last step operation to repeat itself.  If this is 
548  * called repeated times, then the same step will be repeated
549  * that many times.
550  *
551  * Note that it is not possible to scan back using this function,
552  * only to have a way of peeking the next state.
553  **/
554 void
555 camel_mime_parser_unstep (CamelMimeParser *parser)
556 {
557         struct _header_scan_state *s = _PRIVATE (parser);
558         
559         s->unstep++;
560 }
561
562 /**
563  * camel_mime_parser_drop_step:
564  * @parser: MIME parser object
565  * 
566  * Drop the last step call.  This should only be used
567  * in conjunction with seeking of the stream as the
568  * stream may be in an undefined state relative to the
569  * state of the parser.
570  *
571  * Use this call with care.
572  **/
573 void
574 camel_mime_parser_drop_step (CamelMimeParser *parser)
575 {
576         struct _header_scan_state *s = _PRIVATE (parser);
577         
578         s->unstep = 0;
579         folder_scan_drop_step(s);
580 }
581
582 /**
583  * camel_mime_parser_step:
584  * @parser: MIME parser object 
585  * @databuffer: Pointer to accept a pointer to the data
586  * associated with this step (if any).  May be #NULL,
587  * in which case datalength is also ingored.
588  * @datalength: Pointer to accept a pointer to the data
589  * length associated with this step (if any).
590  * 
591  * Parse the next part of the MIME message.  If _unstep()
592  * has been called, then continue to return the same state
593  * for that many calls.
594  *
595  * If the step is CAMEL_MIME_PARSER_STATE_BODY then the databuffer and datalength
596  * pointers will be setup to point to the internal data buffer
597  * of the scanner and may be processed as required.  Any
598  * filters will have already been applied to this data.
599  *
600  * Refer to the state diagram elsewhere for a full listing of
601  * the states an application is gauranteed to get from the
602  * scanner.
603  *
604  * Return value: The current new state of the parser
605  * is returned.
606  **/
607 camel_mime_parser_state_t
608 camel_mime_parser_step (CamelMimeParser *parser, char **databuffer, size_t *datalength)
609 {
610         struct _header_scan_state *s = _PRIVATE (parser);
611
612         d(printf("OLD STATE:  '%s' :\n", states[s->state]));
613
614         if (s->unstep <= 0) {
615                 char *dummy;
616                 size_t dummylength;
617
618                 if (databuffer == NULL) {
619                         databuffer = &dummy;
620                         datalength = &dummylength;
621                 }
622                         
623                 folder_scan_step(s, databuffer, datalength);
624         } else
625                 s->unstep--;
626
627         d(printf("NEW STATE:  '%s' :\n", states[s->state]));
628
629         return s->state;
630 }
631
632 /**
633  * camel_mime_parser_read:
634  * @parser: MIME parser object
635  * @databuffer: 
636  * @len: 
637  * 
638  * Read at most @len bytes from the internal mime parser buffer.
639  *
640  * Returns the address of the internal buffer in @databuffer,
641  * and the length of useful data.
642  *
643  * @len may be specified as INT_MAX, in which case you will
644  * get the full remainder of the buffer at each call.
645  *
646  * Note that no parsing of the data read through this function
647  * occurs, so no state changes occur, but the seek position
648  * is updated appropriately.
649  *
650  * Return value: The number of bytes available, or -1 on error.
651  **/
652 int
653 camel_mime_parser_read (CamelMimeParser *parser, const char **databuffer, int len)
654 {
655         struct _header_scan_state *s = _PRIVATE (parser);
656         int there;
657
658         if (len == 0)
659                 return 0;
660
661         d(printf("parser::read() reading %d bytes\n", len));
662
663         there = MIN(s->inend - s->inptr, len);
664         d(printf("parser::read() there = %d bytes\n", there));
665         if (there > 0) {
666                 *databuffer = s->inptr;
667                 s->inptr += there;
668                 return there;
669         }
670
671         if (folder_read(s) == -1)
672                 return -1;
673
674         there = MIN(s->inend - s->inptr, len);
675         d(printf("parser::read() had to re-read, now there = %d bytes\n", there));
676
677         *databuffer = s->inptr;
678         s->inptr += there;
679
680         return there;
681 }
682
683 /**
684  * camel_mime_parser_tell:
685  * @parser: MIME parser object
686  * 
687  * Return the current scanning offset.  The meaning of this
688  * value will depend on the current state of the parser.
689  *
690  * An incomplete listing of the states:
691  *
692  * CAMEL_MIME_PARSER_STATE_INITIAL, The start of the current message.
693  * CAMEL_MIME_PARSER_STATE_HEADER, CAMEL_MIME_PARSER_STATE_MESSAGE, CAMEL_MIME_PARSER_STATE_MULTIPART, the character
694  * position immediately after the end of the header.
695  * CAMEL_MIME_PARSER_STATE_BODY, Position within the message of the start
696  * of the current data block.
697  * CAMEL_MIME_PARSER_STATE_*_END, The position of the character starting
698  * the next section of the scan (the last position + 1 of
699  * the respective current state).
700  * 
701  * Return value: See above.
702  **/
703 off_t
704 camel_mime_parser_tell (CamelMimeParser *parser)
705 {
706         struct _header_scan_state *s = _PRIVATE (parser);
707
708         return folder_tell(s);
709 }
710
711 /**
712  * camel_mime_parser_tell_start_headers:
713  * @parser: MIME parser object
714  * 
715  * Find out the position within the file of where the
716  * headers started, this is cached by the parser
717  * at the time.
718  * 
719  * Return value: The header start position, or -1 if
720  * no headers were scanned in the current state.
721  **/
722 off_t
723 camel_mime_parser_tell_start_headers (CamelMimeParser *parser)
724 {
725         struct _header_scan_state *s = _PRIVATE (parser);
726
727         return s->start_of_headers;
728 }
729
730 /**
731  * camel_mime_parser_tell_start_from:
732  * @parser: MIME parser object
733  * 
734  * If the parser is scanning From lines, then this returns
735  * the position of the start of the From line.
736  * 
737  * Return value: The start of the from line, or -1 if there
738  * was no From line, or From lines are not being scanned.
739  **/
740 off_t
741 camel_mime_parser_tell_start_from (CamelMimeParser *parser)
742 {
743         struct _header_scan_state *s = _PRIVATE (parser);
744
745         return s->start_of_from;
746 }
747
748 /**
749  * camel_mime_parser_tell_start_boundary:
750  * @parser: MIME parser object
751  * 
752  * When parsing a multipart, this returns the start of the last
753  * boundary.
754  * 
755  * Return value: The start of the boundary, or -1 if there
756  * was no boundary encountered yet.
757  **/
758 off_t
759 camel_mime_parser_tell_start_boundary(CamelMimeParser *parser)
760 {
761         struct _header_scan_state *s = _PRIVATE (parser);
762
763         return s->start_of_boundary;
764 }
765
766 /**
767  * camel_mime_parser_seek:
768  * @parser: MIME parser object
769  * @offset: Number of bytes to offset the seek by.
770  * @whence: SEEK_SET, SEEK_CUR, SEEK_END
771  * 
772  * Reset the source position to a known value.
773  *
774  * Note that if the source stream/descriptor was not
775  * positioned at 0 to begin with, and an absolute seek
776  * is specified (whence != SEEK_CUR), then the seek
777  * position may not match the desired seek position.
778  * 
779  * Return value: The new seek offset, or -1 on
780  * an error (for example, trying to seek on a non-seekable
781  * stream or file descriptor).
782  **/
783 off_t
784 camel_mime_parser_seek(CamelMimeParser *parser, off_t offset, int whence)
785 {
786         struct _header_scan_state *s = _PRIVATE (parser);
787         
788         return folder_seek(s, offset, whence);
789 }
790
791 /**
792  * camel_mime_parser_state:
793  * @parser: MIME parser object
794  * 
795  * Get the current parser state.
796  * 
797  * Return value: The current parser state.
798  **/
799 camel_mime_parser_state_t
800 camel_mime_parser_state (CamelMimeParser *parser)
801 {
802         struct _header_scan_state *s = _PRIVATE (parser);
803         
804         return s->state;
805 }
806
807 /**
808  * camel_mime_parser_push_state:
809  * @mp: MIME parser object
810  * @newstate: New state
811  * @boundary: Boundary marker for state.
812  * 
813  * Pre-load a new parser state.  Used to post-parse multipart content
814  * without headers.
815  **/
816 void
817 camel_mime_parser_push_state(CamelMimeParser *mp, camel_mime_parser_state_t newstate, const char *boundary)
818 {
819         struct _header_scan_stack *h;
820         struct _header_scan_state *s = _PRIVATE(mp);
821
822         h = g_malloc0(sizeof(*h));
823         h->boundarylen = strlen(boundary)+2;
824         h->boundarylenfinal = h->boundarylen+2;
825         h->boundary = g_malloc(h->boundarylen+3);
826         sprintf(h->boundary, "--%s--", boundary);
827         folder_push_part(s, h);
828         s->state = newstate;
829 }
830
831 /**
832  * camel_mime_parser_stream:
833  * @parser: MIME parser object
834  * 
835  * Get the stream, if any, the parser has been initialised
836  * with.  May be used to setup sub-streams, but should not
837  * be read from directly (without saving and restoring
838  * the seek position in between).
839  * 
840  * Return value: The stream from _init_with_stream(), or NULL
841  * if the parser is reading from a file descriptor or is
842  * uninitialised.
843  **/
844 CamelStream *
845 camel_mime_parser_stream (CamelMimeParser *parser)
846 {
847         struct _header_scan_state *s = _PRIVATE (parser);
848         
849         return s->stream;
850 }
851
852 /**
853  * camel_mime_parser_fd:
854  * @parser: MIME parser object
855  * 
856  * Return the file descriptor, if any, the parser has been
857  * initialised with.
858  *
859  * Should not be read from unless the parser it to terminate,
860  * or the seek offset can be reset before the next parse
861  * step.
862  * 
863  * Return value: The file descriptor or -1 if the parser
864  * is reading from a stream or has not been initialised.
865  **/
866 int
867 camel_mime_parser_fd (CamelMimeParser *parser)
868 {
869         struct _header_scan_state *s = _PRIVATE (parser);
870         
871         return s->fd;
872 }
873
874 /* Return errno of the parser, incase any error occured during processing */
875 int
876 camel_mime_parser_errno (CamelMimeParser *parser)
877 {
878         struct _header_scan_state *s = _PRIVATE (parser);
879         
880         return s->ioerrno;
881 }
882
883 /* ********************************************************************** */
884 /*    Implementation                                                      */
885 /* ********************************************************************** */
886
887 /* read the next bit of data, ensure there is enough room 'atleast' bytes */
888 static int
889 folder_read(struct _header_scan_state *s)
890 {
891         int len;
892         int inoffset;
893
894         if (s->inptr<s->inend-s->atleast || s->eof)
895                 return s->inend-s->inptr;
896 #ifdef PURIFY
897         purify_watch_remove(inend_id);
898         purify_watch_remove(inbuffer_id);
899 #endif
900         /* check for any remaning bytes (under the atleast limit( */
901         inoffset = s->inend - s->inptr;
902         if (inoffset>0) {
903                 memmove(s->inbuf, s->inptr, inoffset);
904         }
905         if (s->stream) {
906                 len = camel_stream_read(s->stream, s->inbuf+inoffset, SCAN_BUF-inoffset);
907         } else {
908                 len = read(s->fd, s->inbuf+inoffset, SCAN_BUF-inoffset);
909         }
910         r(printf("read %d bytes, offset = %d\n", len, inoffset));
911         if (len>=0) {
912                 /* add on the last read block */
913                 s->seek += s->inptr - s->inbuf;
914                 s->inptr = s->inbuf;
915                 s->inend = s->inbuf+len+inoffset;
916                 s->eof = (len == 0);
917                 r(printf("content = %d '%.*s'\n",s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
918         } else {
919                 s->ioerrno = errno?errno:EIO;
920         }
921
922         g_assert(s->inptr<=s->inend);
923 #ifdef PURIFY
924         inend_id = purify_watch(&s->inend);
925         inbuffer_id = purify_watch_n(s->inend+1, SCAN_HEAD-1, "rw");
926 #endif
927         r(printf("content = %d '%.*s'\n", s->inend - s->inptr,  s->inend - s->inptr, s->inptr));
928         /* set a sentinal, for the inner loops to check against */
929         s->inend[0] = '\n';
930         return s->inend-s->inptr;
931 }
932
933 /* return the current absolute position of the data pointer */
934 static off_t
935 folder_tell(struct _header_scan_state *s)
936 {
937         return s->seek + (s->inptr - s->inbuf);
938 }
939
940 /*
941   need some way to prime the parser state, so this actually works for 
942   other than top-level messages
943 */
944 static off_t
945 folder_seek(struct _header_scan_state *s, off_t offset, int whence)
946 {
947         off_t newoffset;
948
949         if (s->stream) {
950                 if (CAMEL_IS_SEEKABLE_STREAM(s->stream)) {
951                         /* NOTE: assumes whence seekable stream == whence libc, which is probably
952                            the case (or bloody well should've been) */
953                         newoffset = camel_seekable_stream_seek((CamelSeekableStream *)s->stream, offset, whence);
954                 } else {
955                         newoffset = -1;
956                         errno = EINVAL;
957                 }
958         } else {
959                 newoffset = lseek(s->fd, offset, whence);
960         }
961 #ifdef PURIFY
962         purify_watch_remove(inend_id);
963         purify_watch_remove(inbuffer_id);
964 #endif
965         if (newoffset != -1) {
966                 s->seek = newoffset;
967                 s->inptr = s->inbuf;
968                 s->inend = s->inbuf;
969                 s->eof = FALSE;
970         } else {
971                 s->ioerrno = errno?errno:EIO;
972         }
973 #ifdef PURIFY
974         inend_id = purify_watch(&s->inend);
975         inbuffer_id = purify_watch_n(s->inend+1, SCAN_HEAD-1, "rw");
976 #endif
977         return newoffset;
978 }
979
980 static void
981 folder_push_part(struct _header_scan_state *s, struct _header_scan_stack *h)
982 {
983         if (s->parts && s->parts->atleast > h->boundarylenfinal)
984                 h->atleast = s->parts->atleast;
985         else
986                 h->atleast = MAX(h->boundarylenfinal, 1);
987
988         h->parent = s->parts;
989         s->parts = h;
990 }
991
992 static void
993 folder_pull_part(struct _header_scan_state *s)
994 {
995         struct _header_scan_stack *h;
996
997         h = s->parts;
998         if (h) {
999                 s->parts = h->parent;
1000                 g_free(h->boundary);
1001 #ifdef MEMPOOL
1002                 e_mempool_destroy(h->pool);
1003 #else
1004                 camel_header_raw_clear(&h->headers);
1005 #endif
1006                 camel_content_type_unref(h->content_type);
1007                 if (h->pretext)
1008                         g_byte_array_free(h->pretext, TRUE);
1009                 if (h->posttext)
1010                         g_byte_array_free(h->posttext, TRUE);
1011                 if (h->from_line)
1012                         g_byte_array_free(h->from_line, TRUE);
1013                 g_free(h);
1014         } else {
1015                 g_warning("Header stack underflow!\n");
1016         }
1017 }
1018
1019 static int
1020 folder_scan_skip_line(struct _header_scan_state *s, GByteArray *save)
1021 {
1022         int atleast = s->atleast;
1023         register char *inptr, *inend, c;
1024         int len;
1025
1026         s->atleast = 1;
1027
1028         d(printf("skipping line\n"));
1029
1030         while ( (len = folder_read(s)) > 0 && len > s->atleast) { /* ensure we have at least enough room here */
1031                 inptr = s->inptr;
1032                 inend = s->inend;
1033
1034                 c = -1;
1035                 while (inptr<inend
1036                        && (c = *inptr++)!='\n') {
1037                         d(printf("(%2x,%c)", c, isprint(c)?c:'.'));
1038                         ;
1039                 }
1040
1041                 if (save)
1042                         g_byte_array_append(save, s->inptr, inptr-s->inptr);
1043
1044                 s->inptr = inptr;
1045
1046                 if (c=='\n') {
1047                         s->atleast = atleast;
1048                         return 0;
1049                 }
1050         }
1051
1052         d(printf("couldn't find end of line?\n"));
1053
1054         s->atleast = atleast;
1055
1056         return -1;              /* not found */
1057 }
1058
1059 /* TODO: Is there any way to make this run faster?  It gets called a lot ... */
1060 static struct _header_scan_stack *
1061 folder_boundary_check(struct _header_scan_state *s, const char *boundary, int *lastone)
1062 {
1063         struct _header_scan_stack *part;
1064         int len = s->inend - boundary; /* make sure we dont access past the buffer */
1065
1066         h(printf("checking boundary marker upto %d bytes\n", len));
1067         part = s->parts;
1068         while (part) {
1069                 h(printf("  boundary: %s\n", part->boundary));
1070                 h(printf("   against: '%.*s'\n", part->boundarylen, boundary));
1071                 if (part->boundary
1072                     && part->boundarylen <= len
1073                     && memcmp(boundary, part->boundary, part->boundarylen)==0) {
1074                         h(printf("matched boundary: %s\n", part->boundary));
1075                         /* again, make sure we're in range */
1076                         if (part->boundarylenfinal <= len) {
1077                                 int extra = part->boundarylenfinal - part->boundarylen;
1078                                 
1079                                 /* check the extra stuff on an final boundary, normally -- for mime parts */
1080                                 if (extra>0) {
1081                                         *lastone = memcmp(&boundary[part->boundarylen],
1082                                                           &part->boundary[part->boundarylen],
1083                                                           extra) == 0;
1084                                 } else {
1085                                         *lastone = TRUE;
1086                                 }
1087                                 h(printf("checking lastone = %s\n", *lastone?"TRUE":"FALSE"));
1088                         } else {
1089                                 h(printf("not enough room to check last one?\n"));
1090                                 *lastone = FALSE;
1091                         }
1092                         /*printf("ok, we found it! : %s \n", (*lastone)?"Last one":"More to come?");*/
1093                         return part;
1094                 }
1095                 part = part->parent;
1096         }
1097         return NULL;
1098 }
1099
1100 #ifdef MEMPOOL
1101 static void
1102 header_append_mempool(struct _header_scan_state *s, struct _header_scan_stack *h, char *header, int offset)
1103 {
1104         struct _camel_header_raw *l, *n;
1105         char *content;
1106         
1107         content = strchr(header, ':');
1108         if (content) {
1109                 register int len;
1110                 n = e_mempool_alloc(h->pool, sizeof(*n));
1111                 n->next = NULL;
1112                 
1113                 len = content-header;
1114                 n->name = e_mempool_alloc(h->pool, len+1);
1115                 memcpy(n->name, header, len);
1116                 n->name[len] = 0;
1117                 
1118                 content++;
1119                 
1120                 len = s->outptr - content;
1121                 n->value = e_mempool_alloc(h->pool, len+1);
1122                 memcpy(n->value, content, len);
1123                 n->value[len] = 0;
1124                 
1125                 n->offset = offset;
1126                 
1127                 l = (struct _camel_header_raw *)&h->headers;
1128                 while (l->next) {
1129                         l = l->next;
1130                 }
1131                 l->next = n;
1132         }
1133         
1134 }
1135
1136 #define header_raw_append_parse(a, b, c) (header_append_mempool(s, h, b, c))
1137
1138 #endif
1139
1140 /* Copy the string start->inptr into the header buffer (s->outbuf),
1141    grow if necessary
1142    remove trailing \r chars (\n's assumed already removed)
1143    and track the start offset of the header */
1144 /* Basically an optimised version of g_byte_array_append() */
1145 #define header_append(s, start, inptr)                                                          \
1146 {                                                                                               \
1147         register int headerlen = inptr-start;                                                   \
1148                                                                                                 \
1149         if (headerlen > 0) {                                                                    \
1150                 if (headerlen >= (s->outend - s->outptr)) {                                     \
1151                         register char *outnew;                                                  \
1152                         register int olen = ((s->outend - s->outbuf) + headerlen) * 2 + 1;      \
1153                         outnew = g_realloc(s->outbuf, olen);                                    \
1154                         s->outptr = s->outptr - s->outbuf + outnew;                             \
1155                         s->outbuf = outnew;                                                     \
1156                         s->outend = outnew + olen;                                              \
1157                 }                                                                               \
1158                 if (start[headerlen-1] == '\r')                                                 \
1159                         headerlen--;                                                            \
1160                 memcpy(s->outptr, start, headerlen);                                            \
1161                 s->outptr += headerlen;                                                         \
1162         }                                                                                       \
1163         if (s->header_start == -1)                                                              \
1164                 s->header_start = (start-s->inbuf) + s->seek;                                   \
1165 }
1166
1167 static struct _header_scan_stack *
1168 folder_scan_header(struct _header_scan_state *s, int *lastone)
1169 {
1170         int atleast = s->atleast, newatleast;
1171         char *start = NULL;
1172         int len;
1173         struct _header_scan_stack *h;
1174         char *inend;
1175         register char *inptr;
1176
1177         h(printf("scanning first bit\n"));
1178
1179         h = g_malloc0(sizeof(*h));
1180 #ifdef MEMPOOL
1181         h->pool = e_mempool_new(8192, 4096, E_MEMPOOL_ALIGN_STRUCT);
1182 #endif
1183
1184         if (s->parts)
1185                 newatleast = s->parts->atleast;
1186         else
1187                 newatleast = 1;
1188         *lastone = FALSE;
1189
1190         do {
1191                 s->atleast = newatleast;
1192
1193                 h(printf("atleast = %d\n", s->atleast));
1194
1195                 while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
1196                         inptr = s->inptr;
1197                         inend = s->inend-s->atleast+1;
1198                         
1199                         while (inptr<inend) {
1200                                 if (!s->midline) {
1201                                         if (folder_boundary_check(s, inptr, lastone)) {
1202                                                 if ((s->outptr>s->outbuf))
1203                                                         goto header_truncated; /* may not actually be truncated */
1204                                                 
1205                                                 goto header_done;
1206                                         }
1207                                 }
1208                                 
1209                                 start = inptr;
1210
1211                                 /* goto next line/sentinal */
1212                                 while ((*inptr++)!='\n')
1213                                         ;
1214                         
1215                                 g_assert(inptr<=s->inend+1);
1216                                 
1217                                 /* check for sentinal or real end of line */
1218                                 if (inptr > inend) {
1219                                         h(printf("not at end of line yet, going further\n"));
1220                                         /* didn't find end of line within our allowed area */
1221                                         inptr = inend;
1222                                         s->midline = TRUE;
1223                                         header_append(s, start, inptr);
1224                                 } else {
1225                                         h(printf("got line part: '%.*s'\n", inptr-1-start, start));
1226                                         /* got a line, strip and add it, process it */
1227                                         s->midline = FALSE;
1228 #ifdef PRESERVE_HEADERS
1229                                         header_append(s, start, inptr);
1230 #else
1231                                         header_append(s, start, inptr-1);
1232 #endif
1233                                         /* check for end of headers */
1234                                         if (s->outbuf == s->outptr)
1235                                                 goto header_done;
1236
1237                                         /* check for continuation/compress headers, we have atleast 1 char here to work with */
1238                                         if (inptr[0] ==  ' ' || inptr[0] == '\t') {
1239                                                 h(printf("continuation\n"));
1240 #ifndef PRESERVE_HEADERS
1241                                                 /* TODO: this wont catch multiple space continuation across a read boundary, but
1242                                                    that is assumed rare, and not fatal anyway */
1243                                                 do
1244                                                         inptr++;
1245                                                 while (*inptr == ' ' || *inptr == '\t');
1246                                                 inptr--;
1247                                                 *inptr = ' ';
1248 #endif
1249                                         } else {
1250                                                 /* otherwise, complete header, add it */
1251 #ifdef PRESERVE_HEADERS
1252                                                 s->outptr--;
1253                                                 if (s->outptr[-1] == '\r')
1254                                                         s->outptr--;
1255 #endif
1256                                                 s->outptr[0] = 0;
1257                                 
1258                                                 h(printf("header '%s' at %d\n", s->outbuf, (int)s->header_start));
1259                                                 
1260                                                 header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
1261                                                 s->outptr = s->outbuf;
1262                                                 s->header_start = -1;
1263                                         }
1264                                 }
1265                         }
1266                         s->inptr = inptr;
1267                 }
1268                 h(printf("end of file?  read %d bytes\n", len));
1269                 newatleast = 1;
1270         } while (s->atleast > 1);
1271
1272         if ((s->outptr > s->outbuf) || s->inend > s->inptr) {
1273                 start = s->inptr;
1274                 inptr = s->inend;
1275                 if (inptr > start) {
1276                         if (inptr[-1] == '\n')
1277                                 inptr--;
1278                 }
1279                 goto header_truncated;
1280         }
1281         
1282         s->atleast = atleast;
1283         
1284         return h;
1285         
1286 header_truncated:
1287         header_append(s, start, inptr);
1288         
1289         s->outptr[0] = 0;
1290         if (s->outbuf == s->outptr)
1291                 goto header_done;
1292         
1293         header_raw_append_parse(&h->headers, s->outbuf, s->header_start);
1294         
1295         s->outptr = s->outbuf;
1296 header_done:
1297         s->inptr = inptr;
1298         s->atleast = atleast;
1299         s->header_start = -1;
1300         return h;
1301 }
1302
1303 static struct _header_scan_stack *
1304 folder_scan_content(struct _header_scan_state *s, int *lastone, char **data, size_t *length)
1305 {
1306         int atleast = s->atleast, newatleast;
1307         register char *inptr;
1308         char *inend;
1309         char *start;
1310         int len;
1311         struct _header_scan_stack *part;
1312         int onboundary = FALSE;
1313
1314         c(printf("scanning content\n"));
1315
1316         part = s->parts;
1317         if (part)
1318                 newatleast = part->atleast;
1319         else
1320                 newatleast = 1;
1321         *lastone = FALSE;
1322
1323         c(printf("atleast = %d\n", newatleast));
1324
1325         do {
1326                 s->atleast = newatleast;
1327
1328                 while ((len = folder_read(s))>0 && len >= s->atleast) { /* ensure we have at least enough room here */
1329                         inptr = s->inptr;
1330                         if (s->eof)
1331                                 inend = s->inend;
1332                         else
1333                                 inend = s->inend-s->atleast+1;
1334                         start = inptr;
1335
1336                         c(printf("inptr = %p, inend = %p\n", inptr, inend));
1337
1338                         while (inptr<inend) {
1339                                 if (!s->midline
1340                                     && (part = folder_boundary_check(s, inptr, lastone))) {
1341                                         onboundary = TRUE;
1342
1343                                         /* since we truncate the boundary data, we need at least 1 char here spare,
1344                                            to remain in the same state */
1345                                         if ( (inptr-start) > 1)
1346                                                 goto content;
1347
1348                                         /* otherwise, jump to the state of the boundary we actually found */
1349                                         goto normal_exit;
1350                                 }
1351                                 
1352                                 /* goto the next line */
1353                                 while ((*inptr++)!='\n')
1354                                         ;
1355
1356                                 /* check the sentinal, if we went past the atleast limit, and reset it to there */
1357                                 if (inptr > inend) {
1358                                         s->midline = TRUE;
1359                                         inptr = inend;
1360                                 } else {
1361                                         s->midline = FALSE;
1362                                 }
1363                         }
1364
1365                         c(printf("ran out of input, dumping what i have (%d) bytes midline = %s\n",
1366                                 inptr-start, s->midline?"TRUE":"FALSE"));
1367                         goto content;
1368                 }
1369                 newatleast = 1;
1370         } while (s->atleast > 1);
1371
1372         c(printf("length read = %d\n", len));
1373
1374         if (s->inend > s->inptr) {
1375                 start = s->inptr;
1376                 inptr = s->inend;
1377                 goto content;
1378         }
1379
1380         *length = 0;
1381         *data = s->inptr;
1382         s->atleast = atleast;
1383         return NULL;
1384
1385 content:
1386         /* treat eof as the last boundary in From mode */
1387         if (s->scan_from && s->eof && s->atleast <= 1) {
1388                 onboundary = TRUE;
1389                 part = NULL;
1390         } else {
1391                 part = s->parts;
1392         }
1393 normal_exit:
1394         s->atleast = atleast;
1395         s->inptr = inptr;
1396
1397         *data = start;
1398         /* if we hit a boundary, we should not include the closing \n */
1399         if (onboundary && (inptr-start)>0)
1400                 *length = inptr-start-1;
1401         else
1402                 *length = inptr-start;
1403
1404         /*printf("got %scontent: '%.*s'\n", s->midline?"partial ":"", inptr-start, start);*/
1405
1406         return part;
1407 }
1408
1409
1410 static void
1411 folder_scan_close(struct _header_scan_state *s)
1412 {
1413         g_free(s->realbuf);
1414         g_free(s->outbuf);
1415         while (s->parts)
1416                 folder_pull_part(s);
1417         if (s->fd != -1)
1418                 close(s->fd);
1419         if (s->stream) {
1420                 camel_object_unref((CamelObject *)s->stream);
1421         }
1422         g_free(s);
1423 }
1424
1425
1426 static struct _header_scan_state *
1427 folder_scan_init(void)
1428 {
1429         struct _header_scan_state *s;
1430
1431         s = g_malloc(sizeof(*s));
1432
1433         s->fd = -1;
1434         s->stream = NULL;
1435         s->ioerrno = 0;
1436
1437         s->outbuf = g_malloc(1024);
1438         s->outptr = s->outbuf;
1439         s->outend = s->outbuf+1024;
1440
1441         s->realbuf = g_malloc(SCAN_BUF + SCAN_HEAD*2);
1442         s->inbuf = s->realbuf + SCAN_HEAD;
1443         s->inptr = s->inbuf;
1444         s->inend = s->inbuf;
1445         s->atleast = 0;
1446
1447         s->seek = 0;            /* current character position in file of the last read block */
1448         s->unstep = 0;
1449
1450         s->header_start = -1;
1451
1452         s->start_of_from = -1;
1453         s->start_of_headers = -1;
1454         s->start_of_boundary = -1;
1455
1456         s->midline = FALSE;
1457         s->scan_from = FALSE;
1458         s->scan_pre_from = FALSE;
1459         s->eof = FALSE;
1460
1461         s->filters = NULL;
1462         s->filterid = 1;
1463
1464         s->parts = NULL;
1465
1466         s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1467         return s;
1468 }
1469
1470 static void
1471 drop_states(struct _header_scan_state *s)
1472 {
1473         while (s->parts) {
1474                 folder_scan_drop_step(s);
1475         }
1476         s->unstep = 0;
1477         s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1478 }
1479
1480 static void
1481 folder_scan_reset(struct _header_scan_state *s)
1482 {
1483         drop_states(s);
1484         s->inend = s->inbuf;
1485         s->inptr = s->inbuf;
1486         s->inend[0] = '\n';
1487         if (s->fd != -1) {
1488                 close(s->fd);
1489                 s->fd = -1;
1490         }
1491         if (s->stream) {
1492                 camel_object_unref((CamelObject *)s->stream);
1493                 s->stream = NULL;
1494         }
1495         s->ioerrno = 0;
1496         s->eof = FALSE;
1497 }
1498
1499 static int
1500 folder_scan_init_with_fd(struct _header_scan_state *s, int fd)
1501 {
1502         folder_scan_reset(s);
1503         s->fd = fd;
1504
1505         return 0;
1506 }
1507
1508 static int
1509 folder_scan_init_with_stream(struct _header_scan_state *s, CamelStream *stream)
1510 {
1511         folder_scan_reset(s);
1512         s->stream = stream;
1513         camel_object_ref((CamelObject *)stream);
1514
1515         return 0;
1516 }
1517
1518 #define USE_FROM
1519
1520 static void
1521 folder_scan_step(struct _header_scan_state *s, char **databuffer, size_t *datalength)
1522 {
1523         struct _header_scan_stack *h, *hb;
1524         const char *content;
1525         const char *bound;
1526         int type, state, seenlast;
1527         CamelContentType *ct = NULL;
1528         struct _header_scan_filter *f;
1529         size_t presize;
1530
1531 /*      printf("\nSCAN PASS: state = %d '%s'\n", s->state, states[s->state]);*/
1532
1533 tail_recurse:
1534         d({
1535                 printf("\nSCAN STACK:\n");
1536                 printf(" '%s' :\n", states[s->state]);
1537                 hb = s->parts;
1538                 while (hb) {
1539                         printf("  '%s' : %s ", states[hb->savestate], hb->boundary);
1540                         if (hb->content_type) {
1541                                 printf("(%s/%s)", hb->content_type->type, hb->content_type->subtype);
1542                         } else {
1543                                 printf("(default)");
1544                         }
1545                         printf("\n");
1546                         hb = hb->parent;
1547                 }
1548                 printf("\n");
1549         });
1550
1551         switch (s->state) {
1552
1553 #ifdef USE_FROM
1554         case CAMEL_MIME_PARSER_STATE_INITIAL:
1555                 if (s->scan_from) {
1556                         h = g_malloc0(sizeof(*h));
1557                         h->boundary = g_strdup("From ");
1558                         h->boundarylen = strlen(h->boundary);
1559                         h->boundarylenfinal = h->boundarylen;
1560                         h->from_line = g_byte_array_new();
1561                         folder_push_part(s, h);
1562                         s->state = CAMEL_MIME_PARSER_STATE_PRE_FROM;
1563                 } else {
1564                         s->start_of_from = -1;
1565                         goto scan_header;
1566                 }
1567
1568         case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1569
1570                 h = s->parts;
1571                 do {
1572                         hb = folder_scan_content(s, &state, databuffer, datalength);
1573                         if (s->scan_pre_from && *datalength > 0) {
1574                                 d(printf("got pre-from content %d bytes\n", *datalength));
1575                                 return;
1576                         }
1577                 } while (hb==h && *datalength>0);
1578
1579                 if (*datalength==0 && hb==h) {
1580                         d(printf("found 'From '\n"));
1581                         s->start_of_from = folder_tell(s);
1582                         folder_scan_skip_line(s, h->from_line);
1583                         h->savestate = CAMEL_MIME_PARSER_STATE_INITIAL;
1584                         s->state = CAMEL_MIME_PARSER_STATE_FROM;
1585                 } else {
1586                         folder_pull_part(s);
1587                         s->state = CAMEL_MIME_PARSER_STATE_EOF;
1588                 }
1589                 return;
1590 #else
1591         case CAMEL_MIME_PARSER_STATE_INITIAL:
1592         case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1593 #endif /* !USE_FROM */
1594
1595         scan_header:
1596         case CAMEL_MIME_PARSER_STATE_FROM:
1597                 s->start_of_headers = folder_tell(s);
1598                 h = folder_scan_header(s, &state);
1599 #ifdef USE_FROM
1600                 if (s->scan_from)
1601                         h->savestate = CAMEL_MIME_PARSER_STATE_FROM_END;
1602                 else
1603 #endif
1604                         h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
1605
1606                 /* FIXME: should this check for MIME-Version: 1.0 as well? */
1607
1608                 type = CAMEL_MIME_PARSER_STATE_HEADER;
1609                 if ( (content = camel_header_raw_find(&h->headers, "Content-Type", NULL))
1610                      && (ct = camel_content_type_decode(content))) {
1611                         if (!g_ascii_strcasecmp(ct->type, "multipart")) {
1612                                 if (!camel_content_type_is(ct, "multipart", "signed")
1613                                     && (bound = camel_content_type_param(ct, "boundary"))) {
1614                                         d(printf("multipart, boundary = %s\n", bound));
1615                                         h->boundarylen = strlen(bound)+2;
1616                                         h->boundarylenfinal = h->boundarylen+2;
1617                                         h->boundary = g_malloc(h->boundarylen+3);
1618                                         sprintf(h->boundary, "--%s--", bound);
1619                                         type = CAMEL_MIME_PARSER_STATE_MULTIPART;
1620                                 } else {
1621                                         /*camel_content_type_unref(ct);
1622                                           ct = camel_content_type_decode("text/plain");*/
1623 /* We can't quite do this, as it will mess up all the offsets ... */
1624 /*                                      camel_header_raw_replace(&h->headers, "Content-Type", "text/plain", offset);*/
1625                                         /*g_warning("Multipart with no boundary, treating as text/plain");*/
1626                                 }
1627                         } else if (!g_ascii_strcasecmp(ct->type, "message")) {
1628                                 if (!g_ascii_strcasecmp(ct->subtype, "rfc822")
1629                                     || !g_ascii_strcasecmp(ct->subtype, "news")
1630                                     /*|| !g_ascii_strcasecmp(ct->subtype, "partial")*/) {
1631                                         type = CAMEL_MIME_PARSER_STATE_MESSAGE;
1632                                 }
1633                         }
1634                 } else {
1635                         /* make the default type for multipart/digest be message/rfc822 */
1636                         if ((s->parts
1637                              && camel_content_type_is(s->parts->content_type, "multipart", "digest"))) {
1638                                 ct = camel_content_type_decode("message/rfc822");
1639                                 type = CAMEL_MIME_PARSER_STATE_MESSAGE;
1640                                 d(printf("parent was multipart/digest, autoupgrading to message/rfc822?\n"));
1641                                 /* maybe we should do this too?
1642                                    header_raw_append_parse(&h->headers, "Content-Type: message/rfc822", -1);*/
1643                         } else {
1644                                 ct = camel_content_type_decode("text/plain");
1645                         }
1646                 }
1647                 h->content_type = ct;
1648                 folder_push_part(s, h);
1649                 s->state = type;
1650                 return;
1651                 
1652         case CAMEL_MIME_PARSER_STATE_HEADER:
1653                 s->state = CAMEL_MIME_PARSER_STATE_BODY;
1654                 
1655         case CAMEL_MIME_PARSER_STATE_BODY:
1656                 h = s->parts;
1657                 *datalength = 0;
1658                 presize = SCAN_HEAD;
1659                 f = s->filters;
1660                 
1661                 do {
1662                         hb = folder_scan_content (s, &state, databuffer, datalength);
1663
1664                         d(printf ("\n\nOriginal content: '"));
1665                         d(fwrite(*databuffer, sizeof(char), *datalength, stdout));
1666                         d(printf("'\n"));
1667
1668                         if (*datalength > 0) {
1669                                 while (f) {
1670                                         camel_mime_filter_filter(f->filter, *databuffer, *datalength, presize,
1671                                                                  databuffer, datalength, &presize);
1672                                         d(printf("Filtered content (%s): '", ((CamelObject *)f->filter)->klass->name));
1673                                         d(fwrite(*databuffer, sizeof(char), *datalength, stdout));
1674                                         d(printf("'\n"));
1675                                         f = f->next;
1676                                 }
1677                                 return;
1678                         }
1679                 } while (hb == h && *datalength > 0);
1680                 
1681                 /* check for any filter completion data */
1682                 while (f) {
1683                         camel_mime_filter_complete(f->filter, *databuffer, *datalength, presize,
1684                                                    databuffer, datalength, &presize);
1685                         f = f->next;
1686                 }
1687
1688                 if (*datalength > 0)
1689                         return;
1690                 
1691                 s->state = CAMEL_MIME_PARSER_STATE_BODY_END;
1692                 break;
1693                 
1694         case CAMEL_MIME_PARSER_STATE_MULTIPART:
1695                 h = s->parts;
1696                 /* This mess looks for the next boundary on this
1697                 level.  Once it finds the last one, it keeps going,
1698                 looking for post-multipart content ('postface').
1699                 Because messages might have duplicate boundaries for
1700                 different parts, it makes sure it stops if its already
1701                 found an end boundary for this part.  It handles
1702                 truncated and missing boundaries appropriately too. */
1703                 seenlast = FALSE;
1704                 do {
1705                         do {
1706                                 hb = folder_scan_content(s, &state, databuffer, datalength);
1707                                 if (*datalength>0) {
1708                                         /* instead of a new state, we'll just store it locally and provide
1709                                            an accessor function */
1710                                         d(printf("Multipart %s Content %p: '%.*s'\n",
1711                                                  h->prestage>0?"post":"pre", h, *datalength, *databuffer));
1712                                         if (h->prestage > 0) {
1713                                                 if (h->posttext == NULL)
1714                                                         h->posttext = g_byte_array_new();
1715                                                 g_byte_array_append(h->posttext, *databuffer, *datalength);
1716                                         } else {
1717                                                 if (h->pretext == NULL)
1718                                                         h->pretext = g_byte_array_new();
1719                                                 g_byte_array_append(h->pretext, *databuffer, *datalength);
1720                                         }
1721                                 }
1722                         } while (hb==h && *datalength>0);
1723                         h->prestage++;
1724                         if (*datalength==0 && hb==h && !seenlast) {
1725                                 d(printf("got boundary: %s last=%d\n", hb->boundary, state));
1726                                 s->start_of_boundary = folder_tell(s);
1727                                 folder_scan_skip_line(s, NULL);
1728                                 if (!state) {
1729                                         s->state = CAMEL_MIME_PARSER_STATE_FROM;
1730                                         folder_scan_step(s, databuffer, datalength);
1731                                         s->parts->savestate = CAMEL_MIME_PARSER_STATE_MULTIPART; /* set return state for the new head part */
1732                                         return;
1733                                 } else
1734                                         seenlast = TRUE;
1735                         } else {
1736                                 break;
1737                         }
1738                 } while (1);
1739
1740                 s->state = CAMEL_MIME_PARSER_STATE_MULTIPART_END;
1741                 break;
1742
1743         case CAMEL_MIME_PARSER_STATE_MESSAGE:
1744                 s->state = CAMEL_MIME_PARSER_STATE_FROM;
1745                 folder_scan_step(s, databuffer, datalength);
1746                 s->parts->savestate = CAMEL_MIME_PARSER_STATE_MESSAGE_END;
1747                 break;
1748
1749         case CAMEL_MIME_PARSER_STATE_FROM_END:
1750         case CAMEL_MIME_PARSER_STATE_BODY_END:
1751         case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
1752         case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
1753                 s->state = s->parts->savestate;
1754                 folder_pull_part(s);
1755                 if (s->state & CAMEL_MIME_PARSER_STATE_END)
1756                         return;
1757                 goto tail_recurse;
1758
1759         case CAMEL_MIME_PARSER_STATE_EOF:
1760                 return;
1761
1762         default:
1763                 g_warning ("Invalid state in camel-mime-parser: %u", s->state);
1764                 break;
1765         }
1766
1767         return;
1768 }
1769
1770 /* drops the current state back one */
1771 static void
1772 folder_scan_drop_step(struct _header_scan_state *s)
1773 {
1774         switch (s->state) {
1775         case CAMEL_MIME_PARSER_STATE_EOF:
1776                 s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1777         case CAMEL_MIME_PARSER_STATE_INITIAL:
1778                 return;
1779
1780         case CAMEL_MIME_PARSER_STATE_FROM:
1781         case CAMEL_MIME_PARSER_STATE_PRE_FROM:
1782                 s->state = CAMEL_MIME_PARSER_STATE_INITIAL;
1783                 folder_pull_part(s);
1784                 return;
1785
1786         case CAMEL_MIME_PARSER_STATE_MESSAGE:
1787         case CAMEL_MIME_PARSER_STATE_HEADER:
1788         case CAMEL_MIME_PARSER_STATE_MULTIPART:
1789
1790         case CAMEL_MIME_PARSER_STATE_FROM_END:
1791         case CAMEL_MIME_PARSER_STATE_BODY_END:
1792         case CAMEL_MIME_PARSER_STATE_MULTIPART_END:
1793         case CAMEL_MIME_PARSER_STATE_MESSAGE_END:
1794
1795                 s->state = s->parts->savestate;
1796                 folder_pull_part(s);
1797                 if (s->state & CAMEL_MIME_PARSER_STATE_END) {
1798                         s->state &= ~CAMEL_MIME_PARSER_STATE_END;
1799                 }
1800                 return;
1801         default:
1802                 /* FIXME: not sure if this is entirely right */
1803                 break;
1804         }
1805 }
1806
1807 #ifdef STANDALONE
1808 int main(int argc, char **argv)
1809 {
1810         int fd;
1811         struct _header_scan_state *s;
1812         char *data;
1813         size_t len;
1814         int state;
1815         char *name = "/tmp/evmail/Inbox";
1816         struct _header_scan_stack *h;
1817         int i;
1818         int attach = 0;
1819
1820         if (argc==2)
1821                 name = argv[1];
1822
1823         printf("opening: %s", name);
1824
1825         for (i=1;i<argc;i++) {
1826                 const char *encoding = NULL, *charset = NULL;
1827                 char *attachname;
1828
1829                 name = argv[i];
1830                 printf("opening: %s", name);
1831                 
1832                 fd = g_open(name, O_RDONLY|O_BINARY, 0);
1833                 if (fd==-1) {
1834                         perror("Cannot open mailbox");
1835                         exit(1);
1836                 }
1837                 s = folder_scan_init();
1838                 folder_scan_init_with_fd(s, fd);
1839                 s->scan_from = FALSE;
1840 #if 0
1841                 h = g_malloc0(sizeof(*h));
1842                 h->savestate = CAMEL_MIME_PARSER_STATE_EOF;
1843                 folder_push_part(s, h);
1844 #endif  
1845                 while (s->state != CAMEL_MIME_PARSER_STATE_EOF) {
1846                         folder_scan_step(s, &data, &len);
1847                         printf("\n -- PARSER STEP RETURN -- %d '%s'\n\n", s->state, states[s->state]);
1848                         switch (s->state) {
1849                         case CAMEL_MIME_PARSER_STATE_HEADER:
1850                                 if (s->parts->content_type
1851                                     && (charset = camel_content_type_param(s->parts->content_type, "charset"))) {
1852                                         if (g_ascii_strcasecmp(charset, "us-ascii")) {
1853 #if 0
1854                                                 folder_push_filter_charset(s, "UTF-8", charset);
1855 #endif
1856                                         } else {
1857                                                 charset = NULL;
1858                                         }
1859                                 } else {
1860                                         charset = NULL;
1861                                 }
1862
1863                                 encoding = camel_header_raw_find(&s->parts->headers, "Content-transfer-encoding", 0);
1864                                 printf("encoding = '%s'\n", encoding);
1865                                 if (encoding && !g_ascii_strncasecmp(encoding, " base64", 7)) {
1866                                         printf("adding base64 filter\n");
1867                                         attachname = g_strdup_printf("attach.%d.%d", i, attach++);
1868 #if 0
1869                                         folder_push_filter_save(s, attachname);
1870 #endif
1871                                         g_free(attachname);
1872 #if 0
1873                                         folder_push_filter_mime(s, 0);
1874 #endif
1875                                 }
1876                                 if (encoding && !g_ascii_strncasecmp(encoding, " quoted-printable", 17)) {
1877                                         printf("adding quoted-printable filter\n");
1878                                         attachname = g_strdup_printf("attach.%d.%d", i, attach++);
1879 #if 0
1880                                         folder_push_filter_save(s, attachname);
1881 #endif
1882                                         g_free(attachname);
1883 #if 0
1884                                         folder_push_filter_mime(s, 1);
1885 #endif
1886                                 }
1887
1888                                 break;
1889                         case CAMEL_MIME_PARSER_STATE_BODY:
1890                                 printf("got body %d '%.*s'\n",  len, len, data);
1891                                 break;
1892                         case CAMEL_MIME_PARSER_STATE_BODY_END:
1893                                 printf("end body %d '%.*s'\n",  len, len, data);
1894                                 if (encoding && !g_ascii_strncasecmp(encoding, " base64", 7)) {
1895                                         printf("removing filters\n");
1896 #if 0
1897                                         folder_filter_pull(s);
1898                                         folder_filter_pull(s);
1899 #endif
1900                                 }
1901                                 if (encoding && !g_ascii_strncasecmp(encoding, " quoted-printable", 17)) {
1902                                         printf("removing filters\n");
1903 #if 0
1904                                         folder_filter_pull(s);
1905                                         folder_filter_pull(s);
1906 #endif
1907                                 }
1908                                 if (charset) {
1909 #if 0
1910                                         folder_filter_pull(s);
1911 #endif
1912                                         charset = NULL;
1913                                 }
1914                                 encoding = NULL;
1915                                 break;
1916                         default:
1917                                 break;
1918                         }
1919                 }
1920                 folder_scan_close(s);
1921                 close(fd);
1922         }
1923         return 0;
1924 }
1925
1926 #endif /* STANDALONE */
1927