Remove my iso8859-1 -> iso-8859-1 hack and use
authorJeffrey Stedfast <fejj@ximian.com>
Thu, 19 Jul 2001 21:00:27 +0000 (21:00 +0000)
committerJeffrey Stedfast <fejj@src.gnome.org>
Thu, 19 Jul 2001 21:00:27 +0000 (21:00 +0000)
2001-07-19  Jeffrey Stedfast  <fejj@ximian.com>

* camel-mime-utils.c (rfc2047_decode_word): Remove my iso8859-1 ->
iso-8859-1 hack and use camel_charset_get_iconv_friendly_name()
instead.
(rfc2184_decode): Use camel_charset_get_iconv_friendly_name()

* camel.c (camel_init): Call camel_charset_map_init().

* camel-charset-map.c (camel_charset_map_init): New function to
initialize the charset-equivalent lookup table. To be called by
camel_init().
(camel_charset_get_iconv_friendly_name): New function to try and
convert a charset into something that iconv is more likely to
accept.

camel/ChangeLog
camel/camel-charset-map.c
camel/camel-charset-map.h
camel/camel-mime-utils.c
camel/camel.c

index 413c00c..f3c3f41 100644 (file)
@@ -1,3 +1,19 @@
+2001-07-19  Jeffrey Stedfast  <fejj@ximian.com>
+
+       * camel-mime-utils.c (rfc2047_decode_word): Remove my iso8859-1 ->
+       iso-8859-1 hack and use camel_charset_get_iconv_friendly_name()
+       instead.
+       (rfc2184_decode): Use camel_charset_get_iconv_friendly_name()
+
+       * camel.c (camel_init): Call camel_charset_map_init().
+
+       * camel-charset-map.c (camel_charset_map_init): New function to
+       initialize the charset-equivalent lookup table. To be called by
+       camel_init().
+       (camel_charset_get_iconv_friendly_name): New function to try and
+       convert a charset into something that iconv is more likely to
+       accept.
+
 2001-07-19  Peter Williams  <peterw@ximian.com>
 
        * Revert 7/11/2001 patch for IMAP INBOX filtering at NotZed's 
index 236197e..d609321 100644 (file)
@@ -197,19 +197,85 @@ void main(void)
 
 #include "camel-charset-map.h"
 #include "camel-charset-map-private.h"
+#include "hash-table-utils.h"
 #include <gal/unicode/gunicode.h>
 #include <locale.h>
 #include <string.h>
 #include <glib.h>
+#ifdef ENABLE_THREADS
+#include <pthread.h>
+#endif
+
+
+#ifdef ENABLE_THREADS
+static pthread_mutex_t iconv_charsets_lock = PTHREAD_MUTEX_INITIALIZER;
+#define ICONV_CHARSETS_LOCK() pthread_mutex_lock (&iconv_charsets_lock)
+#define ICONV_CHARSETS_UNLOCK() pthread_mutex_unlock (&iconv_charsets_lock)
+#else
+#define ICONV_CHARSETS_LOCK()
+#define ICONV_CHARSETS_UNLOCK()
+#endif /* ENABLE_THREADS */
+
+static GHashTable *iconv_charsets = NULL;
+
+struct {
+       char *charset;
+       char *iconv_name;
+} known_iconv_charsets[] = {
+       /* charset name, iconv-friendly charset name */
+       { "iso-8859-1",     "iso-8859-1" },
+       { "iso8859-1",      "iso-8859-1" },
+       /* the above mostly serves as an example for iso-style charsets,
+          but we have code that will populate the iso-*'s if/when they
+          show up in camel_charset_map_get_iconv_friendly_name() so I'm
+          not going to bother putting them all in here... */
+       { "windows-cp1251", "cp1251"     },
+       { "windows-1251",   "cp1251"     },
+       { "cp1251",         "cp1251"     },
+       { NULL,             NULL         }
+};
+
+
+static void
+shutdown_foreach (gpointer key, gpointer value, gpointer data)
+{
+       g_free (key);
+       g_free (value);
+}
+
+static void
+camel_charset_map_shutdown (void)
+{
+       g_hash_table_foreach (iconv_charsets, shutdown_foreach, NULL);
+       g_hash_table_destroy (iconv_charsets);
+}
+
+void
+camel_charset_map_init (void)
+{
+       int i;
+       
+       if (iconv_charsets)
+               return;
+       
+       iconv_charsets = g_hash_table_new (g_strcase_hash, g_strcase_equal);
+       for (i = 0; known_iconv_charsets[i].charset != NULL; i++) {
+               g_hash_table_insert (iconv_charsets, g_strdup (known_iconv_charsets[i].charset),
+                                    g_strdup (known_iconv_charsets[i].iconv_name));
+       }
+       
+       g_atexit (camel_charset_map_shutdown);
+}
 
-void camel_charset_init(CamelCharset *c)
+void
+camel_charset_init (CamelCharset *c)
 {
        c->mask = ~0;
        c->level = 0;
 }
 
 void
-camel_charset_step(CamelCharset *c, const char *in, int len)
+camel_charset_step (CamelCharset *c, const char *in, int len)
 {
        register unsigned int mask;
        register int level;
@@ -260,7 +326,8 @@ camel_charset_best_mask(unsigned int mask)
        return "UTF-8";
 }
 
-const char *camel_charset_best_name(CamelCharset *charset)
+const char *
+camel_charset_best_name(CamelCharset *charset)
 {
        if (charset->level == 1)
                return "ISO-8859-1";
@@ -317,5 +384,39 @@ camel_charset_locale_name (void)
        return charset;
 }
 
+const char *
+camel_charset_get_iconv_friendly_name (const char *name)
+{
+       const char *charset;
+       
+       ICONV_CHARSETS_LOCK ();
+       charset = g_hash_table_lookup (iconv_charsets, name);
+       if (!charset) {
+               /* Attempt to friendlyify the charset */
+               char *new_charset;
+               int len;
+               
+               /* Hack to convert charsets like ISO8859-1 to iconv-friendly ISO-8859-1 */
+               if (!g_strncasecmp (name, "iso", 3) && name[3] != '-' && name[3] != '_') {
+                       len = strlen (name);
+                       new_charset = g_malloc (len + 2);
+                       memcpy (new_charset, name, 3);
+                       new_charset[3] = '-';
+                       memcpy (new_charset + 4, name + 3, len - 3);
+                       new_charset[len + 1] = '\0';
+                       g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset);
+               } else {
+                       /* *shrug* - add it to the hash table just the way it is? */
+                       new_charset = g_strdup (name);
+                       g_hash_table_insert (iconv_charsets, g_strdup (name), new_charset);
+               }
+               
+               charset = new_charset;
+       }
+       ICONV_CHARSETS_UNLOCK ();
+       
+       return charset;
+}
+
 #endif /* !BUILD_MAP */
 
index 54d62f8..47b3cc0 100644 (file)
@@ -28,6 +28,8 @@ struct _CamelCharset {
        int level;
 };
 
+void camel_charset_map_init (void);
+
 void camel_charset_init(CamelCharset *);
 void camel_charset_step(CamelCharset *, const char *in, int len);
 const char *camel_charset_best_name(CamelCharset *);
@@ -37,4 +39,6 @@ const char *camel_charset_best(const char *in, int len);
 
 char *camel_charset_locale_name (void);
 
+const char *camel_charset_get_iconv_friendly_name (const char *name);
+
 #endif /* ! _CAMEL_CHARSET_MAP_H */
index 357c183..ba704b1 100644 (file)
@@ -904,6 +904,7 @@ rfc2047_decode_word(const char *in, int len)
        const char *inptr = in+2;
        const char *inend = in+len-2;
        const char *inbuf;
+       const char *charset;
        char *encname;
        int tmplen;
        int ret;
@@ -949,19 +950,12 @@ rfc2047_decode_word(const char *in, int len)
                if (inlen > 0) {
                        /* yuck, all this snot is to setup iconv! */
                        tmplen = inptr - in - 3;
-                       encname = alloca (tmplen + 2);
-                       
-                       /* Hack to convert charsets like ISO8859-1 to iconv-friendly ISO-8859-1 */
-                       if (!g_strncasecmp (in + 2, "iso", 3) && *(in + 5) != '-') {
-                               memcpy (encname, in + 2, 3);
-                               encname[3] = '-';
-                               memcpy (encname + 4, in + 5, tmplen - 3);
-                               tmplen++;
-                       } else {
-                               memcpy (encname, in + 2, tmplen);
-                       }
+                       encname = alloca (tmplen + 1);
+                       memcpy (encname, in + 2, tmplen);
                        encname[tmplen] = '\0';
                        
+                       charset = camel_charset_get_iconv_friendly_name (encname);
+                       
                        inbuf = decword;
                        
                        outlen = inlen * 6 + 16;
@@ -969,27 +963,27 @@ rfc2047_decode_word(const char *in, int len)
                        outbuf = outbase;
                        
                        /* TODO: Should this cache iconv converters? */
-                       ic = iconv_open ("UTF-8", encname);
+                       ic = iconv_open ("UTF-8", charset);
                        if (ic != (iconv_t)-1) {
                                ret = iconv (ic, &inbuf, &inlen, &outbuf, &outlen);
-                               if (ret>=0) {
+                               if (ret >= 0) {
                                        iconv (ic, NULL, 0, &outbuf, &outlen);
                                        *outbuf = 0;
                                        decoded = g_strdup (outbase);
                                }
                                iconv_close (ic);
                        } else {
-                               w(g_warning("Cannot decode charset, header display may be corrupt: %s: %s",
-                                           encname, strerror(errno)));
+                               w(g_warning ("Cannot decode charset, header display may be corrupt: %s: %s",
+                                            charset, g_strerror (errno)));
                                /* TODO: Should this do this, or just leave the encoded strings? */
                                decword[inlen] = 0;
-                               decoded = g_strdup(decword);
+                               decoded = g_strdup (decword);
                        }
                }
        }
-
+       
        d(printf("decoded '%s'\n", decoded));
-
+       
        return decoded;
 }
 
@@ -1166,16 +1160,16 @@ rfc2047_encode_word(GString *outstring, const char *in, int len, const char *typ
        d(printf("Converting [%d] '%.*s' to %s\n", len, len, in, type));
 
        /* convert utf8->encoding */
-       bufflen = len*6+16;
-       buffer = alloca(bufflen);
+       bufflen = len * 6 + 16;
+       buffer = alloca (bufflen);
        inlen = len;
        inptr = in;
-
-       ascii = alloca(bufflen);
-
-       if (strcasecmp(type, "UTF-8") != 0)
-               ic = iconv_open(type, "UTF-8");
-
+       
+       ascii = alloca (bufflen);
+       
+       if (g_strcasecmp (type, "UTF-8") != 0)
+               ic = iconv_open (type, "UTF-8");
+       
        while (inlen) {
                int convlen, i, proclen;
 
@@ -1802,6 +1796,7 @@ rfc2184_decode (const char *in, int len)
 {
        const char *inptr = in;
        const char *inend = in + len;
+       const char *charset;
        char *decoded = NULL;
        char *encoding;
        
@@ -1810,11 +1805,12 @@ rfc2184_decode (const char *in, int len)
                return NULL;
        
        encoding = g_strndup (in, inptr - in);
+       charset = camel_charset_get_iconv_friendly_name (encoding);
+       g_free (encoding);
+       
        inptr = memchr (inptr + 1, '\'', inend - inptr - 1);
-       if (!inptr) {
-               g_free (encoding);
+       if (!inptr)
                return NULL;
-       }
        
        inptr++;
        if (inptr < inend) {
@@ -1825,14 +1821,14 @@ rfc2184_decode (const char *in, int len)
                
                inbuf = decword = hex_decode (inptr, inend - inptr);
                inlen = strlen (inbuf);
-                               
-               ic = iconv_open ("UTF-8", encoding);
+               
+               ic = iconv_open ("UTF-8", charset);
                if (ic != (iconv_t) -1) {
                        int ret;
-               
+                       
                        outlen = inlen * 6 + 16;
                        outbuf = outbase = g_malloc (outlen);
-       
+                       
                        ret = iconv (ic, &inbuf, &inlen, &outbuf, &outlen);
                        if (ret >= 0) {
                                iconv (ic, NULL, 0, &outbuf, &outlen);
index aaf31ca..4a7c8fd 100644 (file)
@@ -35,6 +35,7 @@
 #endif /* HAVE_NSS */
 
 #include "camel.h"
+#include "camel-charset-map.h"
 
 gboolean camel_verbose_debug = FALSE;
 
@@ -62,6 +63,8 @@ camel_init (const char *configdir, gboolean nss_init)
        if (getenv ("CAMEL_VERBOSE_DEBUG"))
                camel_verbose_debug = TRUE;
        
+       camel_charset_map_init ();
+       
 #ifdef HAVE_NSS
        if (nss_init) {
                PR_Init (PR_SYSTEM_THREAD, PR_PRIORITY_NORMAL, 10);