src/plugin_xmms/charset.c

   1 /* libxmms-flac - XMMS FLAC input plugin
   2  * Copyright (C) 2002  Daisuke Shimamura
   3  *
   4  * Almost from charset.c
   5  *  EasyTAG - Tag editor for MP3 and OGG files
   6  *  Copyright (C) 1999-2001  Håvard Kvålen <havardk@xmms.org>
   7  *
   8  * This program is free software; you can redistribute it and/or
   9  * modify it under the terms of the GNU General Public License
  10  * as published by the Free Software Foundation; either version 2
  11  * of the License, or (at your option) any later version.
  12  *
  13  * This program is distributed in the hope that it will be useful,
  14  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  * GNU General Public License for more details.
  17  *
  18  * You should have received a copy of the GNU General Public License
  19  * along with this program; if not, write to the Free Software
  20  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
  21  */
  22
  23 #include <stdlib.h>
  24 #include <glib.h>
  25 #include <string.h>
  26 #include <errno.h>
  27
  28 #ifdef HAVE_ICONV
  29 #include <iconv.h>
  30 #endif
  31
  32 #ifdef HAVE_LANGINFO_CODESET
  33 #include <langinfo.h>
  34 #endif
  35
  36 #include "charset.h"
  37 #include "mylocale.h"
  38 #include "configure.h"
  39
  40
  41 /****************
  42  * Declarations *
  43  ****************/
  44
  45 #define CHARSET_TRANS_ARRAY_LEN ( sizeof(charset_trans_array) / sizeof((charset_trans_array)[0]) )
  46 const CharsetInfo charset_trans_array[] = {
  47     {N_("Arabic (IBM-864)"),                  "IBM864"        },
  48     {N_("Arabic (ISO-8859-6)"),               "ISO-8859-6"    },
  49     {N_("Arabic (Windows-1256)"),             "windows-1256"  },
  50     {N_("Baltic (ISO-8859-13)"),              "ISO-8859-13"   },
  51     {N_("Baltic (ISO-8859-4)"),               "ISO-8859-4"    },
  52     {N_("Baltic (Windows-1257)"),             "windows-1257"  },
  53     {N_("Celtic (ISO-8859-14)"),              "ISO-8859-14"   },
  54     {N_("Central European (IBM-852)"),        "IBM852"        },
  55     {N_("Central European (ISO-8859-2)"),     "ISO-8859-2"    },
  56     {N_("Central European (Windows-1250)"),   "windows-1250"  },
  57     {N_("Chinese Simplified (GB18030)"),      "gb18030"       },
  58     {N_("Chinese Simplified (GB2312)"),       "GB2312"        },
  59     {N_("Chinese Traditional (Big5)"),        "Big5"          },
  60     {N_("Chinese Traditional (Big5-HKSCS)"),  "Big5-HKSCS"    },
  61     {N_("Cyrillic (IBM-855)"),                "IBM855"        },
  62     {N_("Cyrillic (ISO-8859-5)"),             "ISO-8859-5"    },
  63     {N_("Cyrillic (ISO-IR-111)"),             "ISO-IR-111"    },
  64     {N_("Cyrillic (KOI8-R)"),                 "KOI8-R"        },
  65     {N_("Cyrillic (Windows-1251)"),           "windows-1251"  },
  66     {N_("Cyrillic/Russian (CP-866)"),         "IBM866"        },
  67     {N_("Cyrillic/Ukrainian (KOI8-U)"),       "KOI8-U"        },
  68     {N_("English (US-ASCII)"),                "us-ascii"      },
  69     {N_("Greek (ISO-8859-7)"),                "ISO-8859-7"    },
  70     {N_("Greek (Windows-1253)"),              "windows-1253"  },
  71     {N_("Hebrew (IBM-862)"),                  "IBM862"        },
  72     {N_("Hebrew (Windows-1255)"),             "windows-1255"  },
  73     {N_("Japanese (EUC-JP)"),                 "EUC-JP"        },
  74     {N_("Japanese (ISO-2022-JP)"),            "ISO-2022-JP"   },
  75     {N_("Japanese (Shift_JIS)"),              "Shift_JIS"     },
  76     {N_("Korean (EUC-KR)"),                   "EUC-KR"        },
  77     {N_("Nordic (ISO-8859-10)"),              "ISO-8859-10"   },
  78     {N_("South European (ISO-8859-3)"),       "ISO-8859-3"    },
  79     {N_("Thai (TIS-620)"),                    "TIS-620"       },
  80     {N_("Turkish (IBM-857)"),                 "IBM857"        },
  81     {N_("Turkish (ISO-8859-9)"),              "ISO-8859-9"    },
  82     {N_("Turkish (Windows-1254)"),            "windows-1254"  },
  83     {N_("Unicode (UTF-7)"),                   "UTF-7"         },
  84     {N_("Unicode (UTF-8)"),                   "UTF-8"         },
  85     {N_("Unicode (UTF-16BE)"),                "UTF-16BE"      },
  86     {N_("Unicode (UTF-16LE)"),                "UTF-16LE"      },
  87     {N_("Unicode (UTF-32BE)"),                "UTF-32BE"      },
  88     {N_("Unicode (UTF-32LE)"),                "UTF-32LE"      },
  89     {N_("Vietnamese (VISCII)"),               "VISCII"        },
  90     {N_("Vietnamese (Windows-1258)"),         "windows-1258"  },
  91     {N_("Visual Hebrew (ISO-8859-8)"),        "ISO-8859-8"    },
  92     {N_("Western (IBM-850)"),                 "IBM850"        },
  93     {N_("Western (ISO-8859-1)"),              "ISO-8859-1"    },
  94     {N_("Western (ISO-8859-15)"),             "ISO-8859-15"   },
  95     {N_("Western (Windows-1252)"),            "windows-1252"  }
  96
  97     /*
  98      * From this point, character sets aren't supported by iconv
  99      */
 100 /*    {N_("Arabic (IBM-864-I)"),                "IBM864i"              },
 101     {N_("Arabic (ISO-8859-6-E)"),             "ISO-8859-6-E"         },
 102     {N_("Arabic (ISO-8859-6-I)"),             "ISO-8859-6-I"         },
 103     {N_("Arabic (MacArabic)"),                "x-mac-arabic"         },
 104     {N_("Armenian (ARMSCII-8)"),              "armscii-8"            },
 105     {N_("Central European (MacCE)"),          "x-mac-ce"             },
 106     {N_("Chinese Simplified (GBK)"),          "x-gbk"                },
 107     {N_("Chinese Simplified (HZ)"),           "HZ-GB-2312"           },
 108     {N_("Chinese Traditional (EUC-TW)"),      "x-euc-tw"             },
 109     {N_("Croatian (MacCroatian)"),            "x-mac-croatian"       },
 110     {N_("Cyrillic (MacCyrillic)"),            "x-mac-cyrillic"       },
 111     {N_("Cyrillic/Ukrainian (MacUkrainian)"), "x-mac-ukrainian"      },
 112     {N_("Farsi (MacFarsi)"),                  "x-mac-farsi"},
 113     {N_("Greek (MacGreek)"),                  "x-mac-greek"          },
 114     {N_("Gujarati (MacGujarati)"),            "x-mac-gujarati"       },
 115     {N_("Gurmukhi (MacGurmukhi)"),            "x-mac-gurmukhi"       },
 116     {N_("Hebrew (ISO-8859-8-E)"),             "ISO-8859-8-E"         },
 117     {N_("Hebrew (ISO-8859-8-I)"),             "ISO-8859-8-I"         },
 118     {N_("Hebrew (MacHebrew)"),                "x-mac-hebrew"         },
 119     {N_("Hindi (MacDevanagari)"),             "x-mac-devanagari"     },
 120     {N_("Icelandic (MacIcelandic)"),          "x-mac-icelandic"      },
 121     {N_("Korean (JOHAB)"),                    "x-johab"              },
 122     {N_("Korean (UHC)"),                      "x-windows-949"        },
 123     {N_("Romanian (MacRomanian)"),            "x-mac-romanian"       },
 124     {N_("Turkish (MacTurkish)"),              "x-mac-turkish"        },
 125     {N_("User Defined"),                      "x-user-defined"       },
 126     {N_("Vietnamese (TCVN)"),                 "x-viet-tcvn5712"      },
 127     {N_("Vietnamese (VPS)"),                  "x-viet-vps"           },
 128     {N_("Western (MacRoman)"),                "x-mac-roman"          },
 129     // charsets whithout posibly translatable names
 130     {"T61.8bit",                              "T61.8bit"             },
 131     {"x-imap4-modified-utf7",                 "x-imap4-modified-utf7"},
 132     {"x-u-escaped",                           "x-u-escaped"          },
 133     {"windows-936",                           "windows-936"          }
 134 */
 135 };
 136
 137 /*************
 138  * Functions *
 139  *************/
 140
 141 char* get_current_charset (void)
 142 {
 143     char *charset = getenv("CHARSET");
 144
 145 #ifdef HAVE_LANGINFO_CODESET
 146     if (!charset)
 147         charset = nl_langinfo(CODESET);
 148 #endif
 149     if (!charset)
 150         charset = "ISO-8859-1";
 151
 152     return charset;
 153 }
 154
 155
 156 #ifdef HAVE_ICONV
 157 static char* convert_string (const char *string, char *from, char *to)
 158 {
 159     size_t outleft, outsize, length;
 160     iconv_t cd;
 161     char *out, *outptr;
 162     const char *input = string;
 163
 164     if (!string)
 165         return NULL;
 166
 167     length = strlen(string);
 168
 169     /*  g_message("converting %s from %s to %s", string, from, to); */
 170     if ((cd = iconv_open(to, from)) == (iconv_t)-1)
 171     {
 172         g_warning("convert_string(): Conversion not supported. Charsets: %s -> %s", from, to);
 173         return g_strdup(string);
 174     }
 175
 176     /* Due to a GLIBC bug, round outbuf_size up to a multiple of 4 */
 177     /* + 1 for nul in case len == 1 */
 178     outsize = ((length + 3) & ~3) + 1;
 179     out = g_malloc(outsize);
 180     outleft = outsize - 1;
 181     outptr = out;
 182
 183  retry:
 184     if (iconv(cd, &input, &length, &outptr, &outleft) == -1)
 185     {
 186         int used;
 187         switch (errno)
 188         {
 189             case E2BIG:
 190                 used = outptr - out;
 191                 outsize = (outsize - 1) * 2 + 1;
 192                 out = g_realloc(out, outsize);
 193                 outptr = out + used;
 194                 outleft = outsize - 1 - used;
 195                 goto retry;
 196             case EINVAL:
 197                 break;
 198             case EILSEQ:
 199                 /* Invalid sequence, try to get the
 200                                    rest of the string */
 201                 input++;
 202                 length = strlen(input);
 203                 goto retry;
 204             default:
 205                 g_warning("convert_string(): Conversion failed. Inputstring: %s; Error: %s", string, strerror(errno));
 206                 break;
 207         }
 208     }
 209     *outptr = '\0';
 210
 211     iconv_close(cd);
 212     return out;
 213 }
 214 #else
 215 static char* convert_string (const char *string, char *from, char *to)
 216 {
 217     if (!string)
 218         return NULL;
 219     return g_strdup(string);
 220 }
 221 #endif
 222
 223 /*
 224  * Commons conversion functions
 225  */
 226 char* convert_from_file_to_user (const char *string)
 227 {
 228     char *file_charset = flac_cfg.file_char_set;
 229     char *user_charset = flac_cfg.user_char_set;
 230
 231     return convert_string(string,file_charset,user_charset);
 232 }
 233
 234 char* convert_from_user_to_file (const char *string)
 235 {
 236     char *file_charset = flac_cfg.file_char_set;
 237     char *user_charset = flac_cfg.user_char_set;
 238
 239     return convert_string(string,user_charset,file_charset);
 240 }
 241
 242
 243 GList *Charset_Create_List (void)
 244 {
 245     GList *list = NULL;
 246     guint i;
 247
 248     for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
 249         list = g_list_append(list,_(charset_trans_array[i].charset_title));
 250     return list;
 251 }
 252
 253
 254 /*
 255  * Return charset_name from charset_title
 256  */
 257 gchar *Charset_Get_Name_From_Title (gchar *charset_title)
 258 {
 259     guint i;
 260
 261     if (charset_title)
 262         for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
 263             if ( strcasecmp(_(charset_title),_(charset_trans_array[i].charset_title)) == 0 )
 264                 return charset_trans_array[i].charset_name;
 265     return "";
 266 }
 267
 268
 269 /*
 270  * Return charset_title from charset_name
 271  */
 272 gchar *Charset_Get_Title_From_Name (gchar *charset_name)
 273 {
 274     guint i;
 275
 276     if (charset_name)
 277         for (i=0; i<CHARSET_TRANS_ARRAY_LEN; i++)
 278             if ( strcasecmp(charset_name,charset_trans_array[i].charset_name) == 0 )
 279                 return _(charset_trans_array[i].charset_title);
 280     return "";
 281 }
 282
 283
 284
 285 /*
 286  * Test if the conversion is supported between two character sets ('from' and 'to)
 287  */
 288 #ifdef HAVE_ICONV
 289 gboolean test_conversion_charset (char *from, char *to)
 290 {
 291     iconv_t cd;
 292
 293     if ((cd=iconv_open(to,from)) == (iconv_t)-1)
 294     {
 295         /* Conversion not supported */
 296         return FALSE;
 297     }
 298     iconv_close(cd);
 299     return TRUE;
 300 }
 301 #else
 302 gboolean test_conversion_charset (char *from, char *to)
 303 {
 304     return TRUE;
 305 }
 306 #endif