Revert my previous changes to this function.
authorJeffrey Stedfast <fejj@ximian.com>
Wed, 23 Jul 2003 17:21:57 +0000 (17:21 +0000)
committerJeffrey Stedfast <fejj@src.gnome.org>
Wed, 23 Jul 2003 17:21:57 +0000 (17:21 +0000)
2003-07-23  Jeffrey Stedfast  <fejj@ximian.com>

* camel-mime-message.c (find_best_encoding): Revert my previous
changes to this function.

* camel-mime-utils.h: Revert previous changes.

* camel-mime-part.h: Revert previous changes.

* camel-mime-part-utils.c: Revert previous changes.

* camel-data-wrapper.c (camel_data_wrapper_init): Revert previous
changes.
(write_to_stream): Revert previous changes.

camel/ChangeLog
camel/camel-data-wrapper.c
camel/camel-data-wrapper.h
camel/camel-mime-message.c
camel/camel-mime-part-utils.c
camel/camel-mime-part.c
camel/camel-mime-part.h
camel/camel-mime-utils.h

index 50d88a7..f33507b 100644 (file)
@@ -1,3 +1,18 @@
+2003-07-23  Jeffrey Stedfast  <fejj@ximian.com>
+
+       * camel-mime-message.c (find_best_encoding): Revert my previous
+       changes to this function.
+
+       * camel-mime-utils.h: Revert previous changes.
+
+       * camel-mime-part.h: Revert previous changes.
+
+       * camel-mime-part-utils.c: Revert previous changes.
+
+       * camel-data-wrapper.c (camel_data_wrapper_init): Revert previous
+       changes.
+       (write_to_stream): Revert previous changes.
+
 2003-07-23  Dan Winship  <danw@ximian.com>
 
        * camel-block-file.c: #include camel-file-utils.h for camel_read()
index ba898e0..de4d1bf 100644 (file)
@@ -28,9 +28,8 @@
 #include <errno.h>
 
 #include "camel-data-wrapper.h"
+#include "camel-mime-utils.h"
 #include "camel-stream.h"
-#include "camel-stream-filter.h"
-#include "camel-mime-filter-basic.h"
 #include "camel-exception.h"
 #include "camel-private.h"
 
@@ -41,9 +40,9 @@ static CamelObjectClass *parent_class = NULL;
 /* Returns the class for a CamelDataWrapper */
 #define CDW_CLASS(so) CAMEL_DATA_WRAPPER_CLASS (CAMEL_OBJECT_GET_CLASS(so))
 
-static int construct_from_stream(CamelDataWrapper *, CamelStream *);
+static int construct_from_stream (CamelDataWrapper *, CamelStream *);
 static int write_to_stream (CamelDataWrapper *data_wrapper, CamelStream *stream);
-static void set_mime_type (CamelDataWrapper *data_wrapper, const gchar *mime_type);
+static void set_mime_type (CamelDataWrapper *data_wrapper, const char *mime_type);
 static gchar *get_mime_type (CamelDataWrapper *data_wrapper);
 static CamelContentType *get_mime_type_field (CamelDataWrapper *data_wrapper);
 static void set_mime_type_field (CamelDataWrapper *data_wrapper, CamelContentType *mime_type);
@@ -73,8 +72,8 @@ camel_data_wrapper_init (gpointer object, gpointer klass)
        pthread_mutex_init (&camel_data_wrapper->priv->stream_lock, NULL);
        
        camel_data_wrapper->mime_type = header_content_type_new ("application", "octet-stream");
-       camel_data_wrapper->encoding = CAMEL_MIME_PART_ENCODING_DEFAULT;
        camel_data_wrapper->offline = FALSE;
+       camel_data_wrapper->rawtext = TRUE;
 }
 
 static void
@@ -115,8 +114,6 @@ camel_data_wrapper_get_type (void)
 static int
 write_to_stream (CamelDataWrapper *data_wrapper, CamelStream *stream)
 {
-       CamelMimeFilter *filter;
-       CamelStream *fstream;
        int ret;
        
        if (data_wrapper->stream == NULL) {
@@ -129,30 +126,7 @@ write_to_stream (CamelDataWrapper *data_wrapper, CamelStream *stream)
                return -1;
        }
        
-       fstream = (CamelStream *) camel_stream_filter_new_with_stream (data_wrapper->stream);
-       
-       switch (data_wrapper->encoding) {
-       case CAMEL_MIME_PART_ENCODING_BASE64:
-               filter = (CamelMimeFilter *) camel_mime_filter_basic_new_type (CAMEL_MIME_FILTER_BASIC_BASE64_DEC);
-               camel_stream_filter_add (CAMEL_STREAM_FILTER (fstream), filter);
-               camel_object_unref (filter);
-               break;
-       case CAMEL_MIME_PART_ENCODING_QUOTEDPRINTABLE:
-               filter = (CamelMimeFilter *) camel_mime_filter_basic_new_type (CAMEL_MIME_FILTER_BASIC_QP_DEC);
-               camel_stream_filter_add (CAMEL_STREAM_FILTER (fstream), filter);
-               camel_object_unref (filter);
-               break;
-       case CAMEL_MIME_PART_ENCODING_UUENCODE:
-               filter = (CamelMimeFilter *) camel_mime_filter_basic_new_type (CAMEL_MIME_FILTER_BASIC_UU_DEC);
-               camel_stream_filter_add (CAMEL_STREAM_FILTER (fstream), filter);
-               camel_object_unref (filter);
-               break;
-       default:
-               break;
-       }
-       
-       ret = camel_stream_write_to_stream (fstream, stream);
-       camel_object_unref (fstream);
+       ret = camel_stream_write_to_stream (data_wrapper->stream, stream);
        
        CAMEL_DATA_WRAPPER_UNLOCK (data_wrapper, stream_lock);
        
index 03fbd01..ca6f7a8 100644 (file)
@@ -33,7 +33,6 @@ extern "C" {
 
 #include <glib.h>
 #include <camel/camel-object.h>
-#include <camel/camel-mime-utils.h>
 
 #define CAMEL_DATA_WRAPPER_TYPE     (camel_data_wrapper_get_type ())
 #define CAMEL_DATA_WRAPPER(obj)     (CAMEL_CHECK_CAST((obj), CAMEL_DATA_WRAPPER_TYPE, CamelDataWrapper))
@@ -44,12 +43,11 @@ struct _CamelDataWrapper {
        CamelObject parent_object;
        struct _CamelDataWrapperPrivate *priv;
        
-       CamelMimePartEncodingType encoding;
-       
        CamelContentType *mime_type;
        CamelStream *stream;
        
        unsigned int offline:1;
+       unsigned int rawtext:1;
 };
 
 typedef struct {
index 0d5824c..88ab643 100644 (file)
@@ -673,17 +673,17 @@ camel_mime_message_has_8bit_parts (CamelMimeMessage *msg)
 static CamelMimePartEncodingType
 find_best_encoding (CamelMimePart *part, CamelBestencRequired required, CamelBestencEncoding enctype, char **charsetp)
 {
-       CamelMimeFilterCharset *charenc = NULL;
-       CamelMimePartEncodingType encoding;
-       CamelMimeFilterBestenc *bestenc;
-       unsigned int flags, callerflags;
-       CamelDataWrapper *content;
-       CamelStreamFilter *filter;
-       const char *charsetin;
+               const char *charsetin = NULL;
        char *charset = NULL;
        CamelStream *null;
+       CamelStreamFilter *filter;
+       CamelMimeFilterCharset *charenc = NULL;
+       CamelMimeFilterBestenc *bestenc;
        int idb, idc = -1;
        gboolean istext;
+       unsigned int flags, callerflags;
+       CamelMimePartEncodingType encoding;
+       CamelDataWrapper *content;
        
        /* we use all these weird stream things so we can do it with streams, and
           not have to read the whole lot into memory - although i have a feeling
@@ -717,10 +717,12 @@ find_best_encoding (CamelMimePart *part, CamelBestencRequired required, CamelBes
        null = (CamelStream *)camel_stream_null_new ();
        filter = camel_stream_filter_new_with_stream (null);
        
-       /* if we're looking for the best charset, then we need to convert to UTF-8 */
-       if (istext && (required & CAMEL_BESTENC_GET_CHARSET) != 0
-           && (charsetin = header_content_type_param (content->mime_type, "charset"))) {
-               charenc = camel_mime_filter_charset_new_convert (charsetin, "UTF-8");
+       /* if we're not looking for the best charset, then use the one we have */
+       if (istext && (required & CAMEL_BESTENC_GET_CHARSET) == 0
+           && (charsetin = header_content_type_param (part->content_type, "charset"))) {
+               /* if libunicode doesn't support it, we dont really have utf8 anyway, so
+                  we dont need a converter */
+               charenc = camel_mime_filter_charset_new_convert ("UTF-8", charsetin);
                if (charenc != NULL)
                        idc = camel_stream_filter_add (filter, (CamelMimeFilter *)charenc);
                charsetin = NULL;
@@ -733,29 +735,31 @@ find_best_encoding (CamelMimePart *part, CamelBestencRequired required, CamelBes
        camel_stream_filter_remove (filter, idb);
        if (idc != -1) {
                camel_stream_filter_remove (filter, idc);
-               camel_object_unref (charenc);
+               camel_object_unref ((CamelObject *)charenc);
                charenc = NULL;
        }
        
-       if (istext && (required & CAMEL_BESTENC_GET_CHARSET) != 0) {
+       if (istext)
                charsetin = camel_mime_filter_bestenc_get_best_charset (bestenc);
-               d(printf("best charset = %s\n", charsetin ? charsetin : "(null)"));
-               charset = g_strdup (charsetin);
-               
-               charsetin = header_content_type_param (content->mime_type, "charset");
-       } else {
-               charset = NULL;
-       }
+       
+       d(printf("charsetin = %s\n", charsetin ? charsetin : "(null)"));
        
        /* if we have US-ASCII, or we're not doing text, we dont need to bother with the rest */
-       if (istext && charsetin && charset && (required & CAMEL_BESTENC_GET_CHARSET) != 0) {
+       if (charsetin != NULL && (required & CAMEL_BESTENC_GET_CHARSET) != 0) {
+               charset = g_strdup (charsetin);
+               
                d(printf("have charset, trying conversion/etc\n"));
                
-               /* now that 'bestenc' has told us what the best encoding is, we can use that to create
+               /* now the 'bestenc' can has told us what the best encoding is, we can use that to create
                   a charset conversion filter as well, and then re-add the bestenc to filter the
                   result to find the best encoding to use as well */
                
-               charenc = camel_mime_filter_charset_new_convert (charsetin, charset);
+               charenc = camel_mime_filter_charset_new_convert ("UTF-8", charset);
+               
+               /* eek, libunicode doesn't undertand this charset anyway, then the 'utf8' we
+                  thought we had is really the native format, in which case, we just treat
+                  it as binary data (and take the result we have so far) */
+               
                if (charenc != NULL) {
                        /* otherwise, try another pass, converting to the real charset */
                        
@@ -769,15 +773,15 @@ find_best_encoding (CamelMimePart *part, CamelBestencRequired required, CamelBes
                        /* and write it to the new stream */
                        camel_data_wrapper_write_to_stream (content, (CamelStream *)filter);
                        
-                       camel_object_unref (charenc);
+                       camel_object_unref ((CamelObject *)charenc);
                }
        }
        
        encoding = camel_mime_filter_bestenc_get_best_encoding (bestenc, enctype);
        
-       camel_object_unref (filter);
-       camel_object_unref (bestenc);
-       camel_object_unref (null);
+       camel_object_unref ((CamelObject *)filter);
+       camel_object_unref ((CamelObject *)bestenc);
+       camel_object_unref ((CamelObject *)null);
        
        d(printf("done, best encoding = %d\n", encoding));
        
index af4d7c1..800f233 100644 (file)
@@ -5,7 +5,7 @@
  *          Michael Zucchi <notzed@ximian.com>
  *          Jeffrey Stedfast <fejj@ximian.com>
  *
- * Copyright 1999-2003 Ximian, Inc. (www.ximian.com)
+ * Copyright 1999, 2000 Ximian, Inc. (www.ximian.com)
  *
  * This program is free software; you can redistribute it and/or 
  * modify it under the terms of version 2 of the GNU General Public 
@@ -34,6 +34,7 @@
 
 #include <gal/util/e-iconv.h>
 
+#include "camel-string-utils.h"
 #include "camel-charset-map.h"
 #include "camel-mime-part-utils.h"
 #include "camel-mime-message.h"
 #define d(x) /*(printf("%s(%d): ", __FILE__, __LINE__),(x))
               #include <stdio.h>*/
 
+/* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
+
+static const char *
+check_html_charset(char *buffer, int length)
+{
+       CamelHTMLParser *hp;
+       const char *charset = NULL;
+       camel_html_parser_t state;
+       struct _header_content_type *ct;
+
+       /* if we need to first base64/qp decode, do this here, sigh */
+       hp = camel_html_parser_new();
+       camel_html_parser_set_data(hp, buffer, length, TRUE);
+       
+       do {
+               const char *data;
+               int len;
+               const char *val;
+               
+               state = camel_html_parser_step(hp, &data, &len);
+               
+               /* example: <META http-equiv="Content-Type" content="text/html; charset=ISO-8859-1"> */
+               
+               switch(state) {
+               case CAMEL_HTML_PARSER_ELEMENT:
+                       val = camel_html_parser_tag(hp);
+                       d(printf("Got tag: %s\n", val));
+                       if (strcasecmp(val, "meta") == 0
+                           && (val = camel_html_parser_attr(hp, "http-equiv"))
+                           && strcasecmp(val, "content-type") == 0
+                           && (val = camel_html_parser_attr(hp, "content"))
+                           && (ct = header_content_type_decode(val))) {
+                               charset = header_content_type_param(ct, "charset");
+                               charset = e_iconv_charset_name (charset);
+                               header_content_type_unref(ct);
+                       }
+                       break;
+               default:
+                       /* ignore everything else */
+                       break;
+               }
+       } while (charset == NULL && state != CAMEL_HTML_PARSER_EOF);
+
+       camel_object_unref (hp);
+
+       return charset;
+}
+
+static GByteArray *
+convert_buffer (GByteArray *in, const char *to, const char *from)
+{
+       size_t inleft, outleft, outlen, converted = 0;
+       GByteArray *out = NULL;
+       const char *inbuf;
+       char *outbuf;
+       iconv_t cd;
+       
+       if (in->len == 0)
+               return g_byte_array_new();
+       
+       d(printf("converting buffer from %s to %s:\n", from, to));
+       d(fwrite(in->data, 1, (int)in->len, stdout));
+       d(printf("\n"));
+       
+       cd = e_iconv_open(to, from);
+       if (cd == (iconv_t) -1) {
+               g_warning ("Cannot convert from '%s' to '%s': %s", from, to, strerror (errno));
+               return NULL;
+       }
+       
+       outlen = in->len * 2 + 16;
+       out = g_byte_array_new ();
+       g_byte_array_set_size (out, outlen);
+       
+       inbuf = in->data;
+       inleft = in->len;
+       
+       do {
+               outbuf = out->data + converted;
+               outleft = outlen - converted;
+               
+               converted = e_iconv (cd, &inbuf, &inleft, &outbuf, &outleft);
+               if (converted == (size_t) -1) {
+                       if (errno != E2BIG && errno != EINVAL)
+                               goto fail;
+               }
+               
+               /*
+                * E2BIG   There is not sufficient room at *outbuf.
+                *
+                * We just need to grow our outbuffer and try again.
+                */
+               
+               converted = outbuf - (char *)out->data;
+               if (errno == E2BIG) {
+                       outlen += inleft * 2 + 16;
+                       out = g_byte_array_set_size (out, outlen);
+                       outbuf = out->data + converted;
+               }
+               
+       } while (errno == E2BIG && inleft > 0);
+       
+       /*
+        * EINVAL  An  incomplete  multibyte sequence has been encoun­
+        *         tered in the input.
+        *
+        * We'll just have to ignore it...
+        */
+       
+       /* flush the iconv conversion */
+       e_iconv (cd, NULL, NULL, &outbuf, &outleft);
+       
+       /* now set the true length on the GByteArray */
+       converted = outbuf - (char *)out->data;
+       g_byte_array_set_size (out, converted);
+       
+       d(printf("converted data:\n"));
+       d(fwrite(out->data, 1, (int)out->len, stdout));
+       d(printf("\n"));
+       
+       e_iconv_close (cd);
+       
+       return out;
+       
+ fail:
+       g_warning ("Cannot convert from '%s' to '%s': %s", from, to, strerror (errno));
+       
+       g_byte_array_free (out, TRUE);
+       
+       e_iconv_close (cd);
+       
+       return NULL;
+}
+
+/* We don't really use the charset argument except for debugging... */
+static gboolean
+broken_windows_charset (GByteArray *buffer, const char *charset)
+{
+       register unsigned char *inptr;
+       unsigned char *inend;
+       
+       inptr = buffer->data;
+       inend = inptr + buffer->len;
+       
+       while (inptr < inend) {
+               register unsigned char c = *inptr++;
+               
+               if (c >= 128 && c <= 159) {
+                       g_warning ("Encountered Windows charset parading as %s", charset);
+                       return TRUE;
+               }
+       }
+       
+       return FALSE;
+}
+
+static gboolean
+is_7bit (GByteArray *buffer)
+{
+       register unsigned int i;
+       
+       for (i = 0; i < buffer->len; i++)
+               if (buffer->data[i] > 127)
+                       return FALSE;
+       
+       return TRUE;
+}
+
+static const char *iso_charsets[] = {
+       "us-ascii",
+       "iso-8859-1",
+       "iso-8859-2",
+       "iso-8859-3",
+       "iso-8859-4",
+       "iso-8859-5",
+       "iso-8859-6",
+       "iso-8859-7",
+       "iso-8859-8",
+       "iso-8859-9",
+       "iso-8859-10",
+       "iso-8859-11",
+       "iso-8859-12",
+       "iso-8859-13",
+       "iso-8859-14",
+       "iso-8859-15",
+       "iso-8859-16"
+};
+
+#define NUM_ISO_CHARSETS (sizeof (iso_charsets) / sizeof (iso_charsets[0]))
+
+static const char *
+canon_charset_name (const char *charset)
+{
+       const char *ptr;
+       char *endptr;
+       int iso;
+       
+       if (strncasecmp (charset, "iso", 3) != 0)
+               return charset;
+       
+       ptr = charset + 3;
+       if (*ptr == '-' || *ptr == '_')
+               ptr++;
+       
+       /* if it's not an iso-8859-# charset, we don't care about it */
+       if (strncmp (ptr, "8859", 4) != 0)
+               return charset;
+       
+       ptr += 4;
+       if (*ptr == '-' || *ptr == '_')
+               ptr++;
+       
+       iso = strtoul (ptr, &endptr, 10);
+       if (endptr == ptr || *endptr != '\0')
+               return charset;
+       
+       if (iso >= NUM_ISO_CHARSETS)
+               return charset;
+       
+       return iso_charsets[iso];
+}
+
 /* simple data wrapper */
 static void
 simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser *mp)
 {
+       CamelMimeFilter *fdec = NULL, *fcrlf = NULL;
+       CamelMimeFilterBasicType enctype = 0;
+       size_t len;
+       int decid = -1, crlfid = -1;
        struct _header_content_type *ct;
        const char *charset = NULL;
        char *encoding, *buf;
        GByteArray *buffer;
        CamelStream *mem;
-       size_t len;
        
        d(printf ("simple_data_wrapper_construct_from_parser()\n"));
        
        /* first, work out conversion, if any, required, we dont care about what we dont know about */
        encoding = header_content_encoding_decode (camel_mime_parser_header (mp, "Content-Transfer-Encoding", NULL));
+       if (encoding) {
+               if (!strcasecmp (encoding, "base64")) {
+                       d(printf("Adding base64 decoder ...\n"));
+                       enctype = CAMEL_MIME_FILTER_BASIC_BASE64_DEC;
+               } else if (!strcasecmp (encoding, "quoted-printable")) {
+                       d(printf("Adding quoted-printable decoder ...\n"));
+                       enctype = CAMEL_MIME_FILTER_BASIC_QP_DEC;
+               } else if (!strcasecmp (encoding, "x-uuencode")) {
+                       d(printf("Adding uudecoder ...\n"));
+                       enctype = CAMEL_MIME_FILTER_BASIC_UU_DEC;
+               }
+               g_free (encoding);
+               
+               if (enctype != 0) {
+                       fdec = (CamelMimeFilter *)camel_mime_filter_basic_new_type(enctype);
+                       decid = camel_mime_parser_filter_add (mp, fdec);
+               }
+       }
+       
+       /* If we're doing text, we also need to do CRLF->LF and may have to convert it to UTF8 as well. */
+       ct = camel_mime_parser_content_type (mp);
+       if (header_content_type_is (ct, "text", "*")) {
+               charset = header_content_type_param (ct, "charset");
+               charset = e_iconv_charset_name (charset);
+               
+               if (fdec) {
+                       d(printf ("Adding CRLF conversion filter\n"));
+                       fcrlf = camel_mime_filter_crlf_new (CAMEL_MIME_FILTER_CRLF_DECODE,
+                                                           CAMEL_MIME_FILTER_CRLF_MODE_CRLF_ONLY);
+                       crlfid = camel_mime_parser_filter_add (mp, fcrlf);
+               }
+       }
        
        /* read in the entire content */
        buffer = g_byte_array_new ();
@@ -76,22 +334,86 @@ simple_data_wrapper_construct_from_parser (CamelDataWrapper *dw, CamelMimeParser
                g_byte_array_append (buffer, buf, len);
        }
        
-       d(printf("message part kept in memory!\n"));
+       /* check for broken Outlook/Web mailers that like to send html marked as text/plain */
+       if (header_content_type_is (ct, "text", "plain")) {
+               register const unsigned char *inptr;
+               const unsigned char *inend;
+               
+               inptr = buffer->data;
+               inend = inptr + buffer->len;
+               
+               while (inptr < inend && isspace ((int) *inptr))
+                       inptr++;
+
+               if (((inend-inptr) > 5 && g_ascii_strncasecmp(inptr, "<html", 5) == 0)
+                   || ((inend-inptr) > 9 && g_ascii_strncasecmp(inptr, "<!doctype", 9) == 0)) {
+                       /* re-tag as text/html */
+                       g_free (ct->subtype);
+                       ct->subtype = g_strdup ("html");
+               }
+       }
        
-       mem = camel_stream_mem_new_with_byte_array (buffer);
-       camel_data_wrapper_construct_from_stream (dw, mem);
-       camel_object_unref (mem);
+       /* Possible Lame Mailer Alert... check the META tags for a charset */
+       if (!charset && header_content_type_is (ct, "text", "html")) {
+               if ((charset = check_html_charset (buffer->data, buffer->len)))
+                       header_content_type_set_param (ct, "charset", charset);
+       }
        
-       if (encoding) {
-               if (!strcasecmp (encoding, "base64")) {
-                       dw->encoding = CAMEL_MIME_PART_ENCODING_BASE64;
-               } else if (!strcasecmp (encoding, "quoted-printable")) {
-                       dw->encoding = CAMEL_MIME_PART_ENCODING_QUOTEDPRINTABLE;
-               } else if (!strcasecmp (encoding, "x-uuencode")) {
-                       dw->encoding = CAMEL_MIME_PART_ENCODING_UUENCODE;
+       /* if we need to do charset conversion, see if we can/it works/etc */
+       if (charset && !(strcasecmp (charset, "us-ascii") == 0
+                        || strcasecmp (charset, "utf-8") == 0
+                        || strncasecmp (charset, "x-", 2) == 0)) {
+               GByteArray *out;
+               
+               /* You often see Microsoft Windows users announcing their texts
+                * as being in ISO-8859-1 even when in fact they contain funny
+                * characters from the Windows-CP1252 superset.
+                */
+               charset = canon_charset_name (charset);
+               if (!strncasecmp (charset, "iso-8859", 8)) {
+                       /* check for Windows-specific chars... */
+                       if (broken_windows_charset (buffer, charset))
+                               charset = camel_charset_iso_to_windows (charset);
+               }
+               
+               out = convert_buffer (buffer, "UTF-8", charset);
+               if (out) {
+                       /* converted ok, use this data instead */
+                       g_byte_array_free(buffer, TRUE);
+                       dw->rawtext = FALSE;
+                       buffer = out;
+               } else {
+                       /* else failed to convert, leave as raw? */
+                       g_warning("Storing text as raw, unknown charset '%s' or invalid format", charset);
+                       dw->rawtext = TRUE;
+               }
+       } else if (header_content_type_is (ct, "text", "*")) {
+               if (charset == NULL || !strcasecmp (charset, "us-ascii")) {
+                       /* check that it's 7bit */
+                       dw->rawtext = !is_7bit (buffer);
+               } else if (!strncasecmp (charset, "x-", 2)) {
+                       /* we're not even going to bother trying to convert, so set the
+                          rawtext bit to TRUE and let the mailer deal with it. */
+                       dw->rawtext = TRUE;
+               } else if (!strcasecmp (charset, "utf-8") && buffer->len) {
+                       /* check that it is valid utf8 */
+                       dw->rawtext = !g_utf8_validate (buffer->data, buffer->len, NULL);
                }
-               g_free (encoding);
        }
+       
+       d(printf("message part kept in memory!\n"));
+       
+       mem = camel_stream_mem_new_with_byte_array(buffer);
+       camel_data_wrapper_construct_from_stream(dw, mem);
+       camel_object_unref((CamelObject *)mem);
+
+       camel_mime_parser_filter_remove(mp, decid);
+       camel_mime_parser_filter_remove(mp, crlfid);
+       
+       if (fdec)
+               camel_object_unref((CamelObject *)fdec);
+       if (fcrlf)
+               camel_object_unref((CamelObject *)fcrlf);
 }
 
 /* This replaces the data wrapper repository ... and/or could be replaced by it? */
@@ -102,7 +424,7 @@ camel_mime_part_construct_content_from_parser (CamelMimePart *dw, CamelMimeParse
        CamelContentType *ct;
        
        ct = camel_mime_parser_content_type (mp);
-       
+
        switch (camel_mime_parser_state (mp)) {
        case HSCAN_HEADER:
                d(printf("Creating body part\n"));
@@ -139,6 +461,15 @@ camel_mime_part_construct_content_from_parser (CamelMimePart *dw, CamelMimeParse
                /* would you believe you have to set this BEFORE you set the content object???  oh my god !!!! */
                camel_data_wrapper_set_mime_type_field (content, camel_mime_part_get_content_type (dw));
                camel_medium_set_content_object ((CamelMedium *)dw, content);
+               
+               /* Note: we don't set ct as the content-object's mime-type above because
+                * camel_medium_set_content_object() may re-write the Content-Type header
+                * (see CamelMimePart::set_content_object) if we did that (which is a Bad Thing).
+                * However, if we set it *afterward*, we can still use any special auto-detections
+                * that we found in simple_data_wrapper_construct_from_parser(). This is important
+                * later when we go to render the MIME parts in mail-format.c */
+               camel_data_wrapper_set_mime_type_field (content, ct);
+               
                camel_object_unref (content);
        }
 }
index e6ab427..3432487 100644 (file)
@@ -75,7 +75,7 @@ static CamelMediumClass *parent_class=NULL;
 static int             write_to_stream                 (CamelDataWrapper *data_wrapper, CamelStream *stream);
 static int            construct_from_stream           (CamelDataWrapper *dw, CamelStream *s);
 
-/* from CamelMedium */ 
+/* from CamelMedia */ 
 static void            add_header                      (CamelMedium *medium, const char *header_name, const void *header_value);
 static void            set_header                      (CamelMedium *medium, const char *header_name, const void *header_value);
 static void            remove_header                   (CamelMedium *medium, const char *header_name);
@@ -626,6 +626,7 @@ write_to_stream(CamelDataWrapper *data_wrapper, CamelStream *stream)
 {
        CamelMimePart *mp = CAMEL_MIME_PART(data_wrapper);
        CamelMedium *medium = CAMEL_MEDIUM(data_wrapper);
+       CamelStream *ostream = stream;
        CamelDataWrapper *content;
        int total = 0;
        int count;
@@ -666,75 +667,89 @@ write_to_stream(CamelDataWrapper *data_wrapper, CamelStream *stream)
                return -1;
        total += count;
        
-       content = camel_medium_get_content_object (medium);
+       content = camel_medium_get_content_object(medium);
        if (content) {
-               if (mp->encoding != content->encoding) {
-                       /* we need to (re)encode the content stream */
-                       CamelStream *filter_stream = NULL;
-                       CamelMimeFilter *filter = NULL;
-                       const char *filename;
-                       
-                       filter_stream = (CamelStream *) camel_stream_filter_new_with_stream (stream);
-                       switch (mp->encoding) {
-                       case CAMEL_MIME_PART_ENCODING_BASE64:
-                               filter = (CamelMimeFilter *) camel_mime_filter_basic_new_type (CAMEL_MIME_FILTER_BASIC_BASE64_ENC);
-                               camel_stream_filter_add (CAMEL_STREAM_FILTER (filter_stream), filter);
-                               camel_object_unref (filter);
-                               break;
-                       case CAMEL_MIME_PART_ENCODING_QUOTEDPRINTABLE:
-                               filter = (CamelMimeFilter *) camel_mime_filter_basic_new_type (CAMEL_MIME_FILTER_BASIC_QP_ENC);
-                               camel_stream_filter_add (CAMEL_STREAM_FILTER (filter_stream), filter);
-                               camel_object_unref (filter);
-                               break;
-                       case CAMEL_MIME_PART_ENCODING_UUENCODE:
-                               filename = camel_mime_part_get_filename (mp);
-                               count = camel_stream_printf (stream, "begin 0644 %s\n", filename ? filename : "unknown");
-                               if (count == -1) {
-                                       camel_object_unref (filter_stream);
-                                       return -1;
-                               }
-                               
-                               total += count;
-                               
-                               filter = (CamelMimeFilter *) camel_mime_filter_basic_new_type (CAMEL_MIME_FILTER_BASIC_UU_ENC);
-                               camel_stream_filter_add (CAMEL_STREAM_FILTER (filter_stream), filter);
-                               camel_object_unref (filter);
-                               break;
-                       default:
-                               break;
-                       }
-                       
-                       count = camel_data_wrapper_write_to_stream (content, filter_stream);
-                       camel_stream_flush (filter_stream);
-                       camel_object_unref (filter_stream);
-                       
+               /* I dont really like this here, but i dont know where else it might go ... */
+#define CAN_THIS_GO_ELSEWHERE
+#ifdef CAN_THIS_GO_ELSEWHERE
+               CamelMimeFilter *filter = NULL;
+               CamelStreamFilter *filter_stream = NULL;
+               CamelMimeFilter *charenc = NULL;
+               const char *filename;
+               const char *charset;
+               
+               switch (mp->encoding) {
+               case CAMEL_MIME_PART_ENCODING_BASE64:
+                       filter = (CamelMimeFilter *)camel_mime_filter_basic_new_type(CAMEL_MIME_FILTER_BASIC_BASE64_ENC);
+                       break;
+               case CAMEL_MIME_PART_ENCODING_QUOTEDPRINTABLE:
+                       filter = (CamelMimeFilter *)camel_mime_filter_basic_new_type(CAMEL_MIME_FILTER_BASIC_QP_ENC);
+                       break;
+               case CAMEL_MIME_PART_ENCODING_UUENCODE:
+                       filename = camel_mime_part_get_filename (mp);
+                       count = camel_stream_printf (ostream, "begin 644 %s\n", filename ? filename : "untitled");
                        if (count == -1)
                                return -1;
-                       
                        total += count;
+                       filter = (CamelMimeFilter *)camel_mime_filter_basic_new_type(CAMEL_MIME_FILTER_BASIC_UU_ENC);
+                       break;
+               default:
+                       break;
+               }
+               
+               if (!content->rawtext && header_content_type_is(mp->content_type, "text", "*")) {
+                       charset = header_content_type_param(mp->content_type, "charset");
+                       if (charset && !(!strcasecmp(charset, "us-ascii") || !strcasecmp(charset, "utf-8"))) {
+                               charenc = (CamelMimeFilter *)camel_mime_filter_charset_new_convert("UTF-8", charset);
+                       } 
+               }
+               
+               if (filter || charenc) {
+                       filter_stream = camel_stream_filter_new_with_stream(stream);
                        
-                       if (mp->encoding == CAMEL_MIME_PART_ENCODING_UUENCODE) {
-                               /* FIXME: get rid of this special-case x-uuencode crap */
-                               count = camel_stream_write (stream, "end\n", 4);
-                               if (count == -1)
-                                       return -1;
+                       /* if we have a character encoder, add that always */
+                       if (charenc) {
+                               camel_stream_filter_add(filter_stream, charenc);
+                               camel_object_unref((CamelObject *)charenc);
+                       }
+                       
+                       /* we only re-do crlf on encoded blocks */
+                       if (filter && header_content_type_is(mp->content_type, "text", "*")) {
+                               CamelMimeFilter *crlf = camel_mime_filter_crlf_new(CAMEL_MIME_FILTER_CRLF_ENCODE,
+                                                                                  CAMEL_MIME_FILTER_CRLF_MODE_CRLF_ONLY);
                                
-                               total += count;
+                               camel_stream_filter_add(filter_stream, crlf);
+                               camel_object_unref((CamelObject *)crlf);
                        }
-               } else {
-                       /* write the content out raw... */
-                       if (camel_stream_reset (content->stream) == -1)
-                               return -1;
                        
-                       count = camel_stream_write_to_stream (content->stream, stream);
+                       if (filter) {
+                               camel_stream_filter_add(filter_stream, filter);
+                               camel_object_unref((CamelObject *)filter);
+                       }
                        
+                       stream = (CamelStream *)filter_stream;
+               }
+
+#endif
+               
+               count = camel_data_wrapper_write_to_stream(content, stream);
+               
+               if (filter_stream) {
+                       camel_stream_flush((CamelStream *)filter_stream);
+                       camel_object_unref((CamelObject *)filter_stream);
+               }
+               if (count == -1)
+                       return -1;
+               total += count;
+               
+               if (mp->encoding == CAMEL_MIME_PART_ENCODING_UUENCODE) {
+                       count = camel_stream_write (ostream, "end\n", 4);
                        if (count == -1)
                                return -1;
-                       
                        total += count;
                }
        } else {
-               g_warning ("No content for medium, nothing to write");
+               g_warning("No content for medium, nothing to write");
        }
        
        return total;
index d53b660..c925643 100644 (file)
@@ -31,7 +31,7 @@
 #ifdef __cplusplus
 extern "C" {
 #pragma }
-#endif /* __cplusplus */
+#endif /* __cplusplus }*/
 
 #include <camel/camel-medium.h>
 #include <camel/camel-mime-utils.h>
@@ -42,8 +42,23 @@ extern "C" {
 #define CAMEL_MIME_PART_CLASS(k) (CAMEL_CHECK_CLASS_CAST ((k), CAMEL_MIME_PART_TYPE, CamelMimePartClass))
 #define CAMEL_IS_MIME_PART(o)    (CAMEL_CHECK_TYPE((o), CAMEL_MIME_PART_TYPE))
 
+/* note, if you change this, make sure you change the 'encodings' array in camel-mime-part.c */
+enum _CamelMimePartEncodingType {
+        CAMEL_MIME_PART_ENCODING_DEFAULT,
+        CAMEL_MIME_PART_ENCODING_7BIT,
+        CAMEL_MIME_PART_ENCODING_8BIT,
+        CAMEL_MIME_PART_ENCODING_BASE64,
+        CAMEL_MIME_PART_ENCODING_QUOTEDPRINTABLE,
+        CAMEL_MIME_PART_ENCODING_BINARY,
+       CAMEL_MIME_PART_ENCODING_UUENCODE,
+        CAMEL_MIME_PART_NUM_ENCODINGS
+};
+typedef enum _CamelMimePartEncodingType CamelMimePartEncodingType;
+
+
 /* Do not change these values directly, you would regret it one day */
-struct _CamelMimePart {
+struct _CamelMimePart
+{
        CamelMedium parent_object;
 
        CamelContentType *content_type;
index b58bcbe..b949ffc 100644 (file)
@@ -42,18 +42,6 @@ extern "C" {
 #define CAMEL_UUDECODE_STATE_END    (1 << 17)
 #define CAMEL_UUDECODE_STATE_MASK   (CAMEL_UUDECODE_STATE_BEGIN | CAMEL_UUDECODE_STATE_END)
 
-/* note, if you change this, make sure you change the 'encodings' array in camel-mime-part.c */
-typedef enum _CamelMimePartEncodingType {
-       CAMEL_MIME_PART_ENCODING_DEFAULT,
-       CAMEL_MIME_PART_ENCODING_7BIT,
-       CAMEL_MIME_PART_ENCODING_8BIT,
-       CAMEL_MIME_PART_ENCODING_BASE64,
-       CAMEL_MIME_PART_ENCODING_QUOTEDPRINTABLE,
-       CAMEL_MIME_PART_ENCODING_BINARY,
-       CAMEL_MIME_PART_ENCODING_UUENCODE,
-       CAMEL_MIME_PART_NUM_ENCODINGS
-} CamelMimePartEncodingType;
-
 /* a list of references for this message */
 struct _header_references {
        struct _header_references *next;