Corrected.

[platform/upstream/glib.git] / gconvert.c
diff --git a/gconvert.c b/gconvert.c

index da67e6e..2a4cbeb 100644 (file)
--- a/gconvert.c
+++ b/gconvert.c
@@ -25,14 +25,16 @@
  #include <string.h>
  #include <stdlib.h>
  
-#ifdef G_OS_WIN32
-#include <windows.h>
-#endif
-
  #include "glib.h"
  #include "config.h"
  
-#define _(s) (s)
+#ifdef G_PLATFORM_WIN32
+#define STRICT
+#include <windows.h>
+#undef STRICT
+#endif
+
+#include "glibintl.h"
  
  GQuark 
  g_convert_error_quark()
@@ -51,6 +53,20 @@ g_convert_error_quark()
  #error libiconv not in use but included iconv.h is from libiconv
  #endif
  
+/**
+ * g_iconv_open:
+ * @to_codeset: destination codeset
+ * @from_codeset: source codeset
+ * 
+ * Same as the standard UNIX routine iconv_open(), but
+ * may be implemented via libiconv on UNIX flavors that lack
+ * a native implementation.
+ * 
+ * GLib provides g_convert() and g_locale_to_utf8() which are likely
+ * more convenient than the raw iconv wrappers.
+ * 
+ * Return value: a "conversion descriptor"
+ **/
  GIConv
  g_iconv_open (const gchar  *to_codeset,
               const gchar  *from_codeset)
@@ -60,18 +76,50 @@ g_iconv_open (const gchar  *to_codeset,
    return (GIConv)cd;
  }
  
+/**
+ * g_iconv:
+ * @converter: conversion descriptor from g_iconv_open()
+ * @inbuf: bytes to convert
+ * @inbytes_left: inout parameter, bytes remaining to convert in @inbuf
+ * @outbuf: converted output bytes
+ * @outbytes_left: inout parameter, bytes available to fill in @outbuf
+ * 
+ * Same as the standard UNIX routine iconv(), but
+ * may be implemented via libiconv on UNIX flavors that lack
+ * a native implementation.
+ *
+ * GLib provides g_convert() and g_locale_to_utf8() which are likely
+ * more convenient than the raw iconv wrappers.
+ * 
+ * Return value: count of non-reversible conversions, or -1 on error
+ **/
  size_t 
  g_iconv (GIConv   converter,
          gchar  **inbuf,
-        size_t  *inbytes_left,
+        gsize   *inbytes_left,
          gchar  **outbuf,
-        size_t  *outbytes_left)
+        gsize   *outbytes_left)
  {
    iconv_t cd = (iconv_t)converter;
  
    return iconv (cd, inbuf, inbytes_left, outbuf, outbytes_left);
  }
  
+/**
+ * g_iconv_close:
+ * @converter: a conversion descriptor from g_iconv_open()
+ *
+ * Same as the standard UNIX routine iconv_close(), but
+ * may be implemented via libiconv on UNIX flavors that lack
+ * a native implementation. Should be called to clean up
+ * the conversion descriptor from iconv_open() when
+ * you are done converting things.
+ *
+ * GLib provides g_convert() and g_locale_to_utf8() which are likely
+ * more convenient than the raw iconv wrappers.
+ * 
+ * Return value: -1 on error, 0 on success
+ **/
  gint
  g_iconv_close (GIConv converter)
  {
@@ -80,7 +128,7 @@ g_iconv_close (GIConv converter)
    return iconv_close (cd);
  }
  
-GIConv
+static GIConv
  open_converter (const gchar *to_codeset,
                  const gchar *from_codeset,
                 GError     **error)
@@ -92,7 +140,7 @@ open_converter (const gchar *to_codeset,
        /* Something went wrong.  */
        if (errno == EINVAL)
          g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_NO_CONVERSION,
-                     _("Conversion from character set `%s' to `%s' is not suppo\rted"),
+                     _("Conversion from character set `%s' to `%s' is not supported"),
                       from_codeset, to_codeset);
        else
          g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_FAILED,
@@ -131,21 +179,21 @@ open_converter (const gchar *to_codeset,
   **/
  gchar*
  g_convert (const gchar *str,
-           gint         len,
+           gssize       len,  
             const gchar *to_codeset,
             const gchar *from_codeset,
-           gint        *bytes_read,
-          gint        *bytes_written,
+           gsize       *bytes_read, 
+          gsize       *bytes_written, 
            GError     **error)
  {
    gchar *dest;
    gchar *outp;
    const gchar *p;
-  size_t inbytes_remaining;
-  size_t outbytes_remaining;
-  size_t err;
+  gsize inbytes_remaining;
+  gsize outbytes_remaining;
+  gsize err;
    GIConv cd;
-  size_t outbuf_size;
+  gsize outbuf_size;
    gboolean have_error = FALSE;
    
    g_return_val_if_fail (str != NULL, NULL);
@@ -171,6 +219,7 @@ g_convert (const gchar *str,
    p = str;
    inbytes_remaining = len;
    outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
+  
    outbytes_remaining = outbuf_size - 1; /* -1 for nul */
    outp = dest = g_malloc (outbuf_size);
  
@@ -188,9 +237,10 @@ g_convert (const gchar *str,
         case E2BIG:
           {
             size_t used = outp - dest;
+
             outbuf_size *= 2;
             dest = g_realloc (dest, outbuf_size);
-
+               
             outp = dest + used;
             outbytes_remaining = outbuf_size - used - 1; /* -1 for nul */
  
@@ -220,9 +270,12 @@ g_convert (const gchar *str,
      {
        if ((p - str) != len) 
         {
-         g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
-                      _("Partial character sequence at end of input"));
-         have_error = TRUE;
+          if (!have_error)
+            {
+              g_set_error (error, G_CONVERT_ERROR, G_CONVERT_ERROR_PARTIAL_INPUT,
+                           _("Partial character sequence at end of input"));
+              have_error = TRUE;
+            }
         }
      }
  
@@ -253,10 +306,7 @@ g_convert (const gchar *str,
   *                input string that were successfully converted, or %NULL.
   *                Even if the conversion was succesful, this may be 
   *                less than len if there were partial characters
- *                at the end of the input. If the error
- *                G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
- *                stored will the byte fofset after the last valid
- *                input sequence.
+ *                at the end of the input.
   * @bytes_written: the stored in the output buffer (not including the
   *                 terminating nul.
   * @error:        location to store the error occuring, or %NULL to ignore
@@ -276,12 +326,12 @@ g_convert (const gchar *str,
   **/
  gchar*
  g_convert_with_fallback (const gchar *str,
-                        gint         len,
+                        gssize       len,    
                          const gchar *to_codeset,
                          const gchar *from_codeset,
                          gchar       *fallback,
-                        gint        *bytes_read,
-                        gint        *bytes_written,
+                        gsize       *bytes_read,
+                        gsize       *bytes_written,
                          GError     **error)
  {
    gchar *utf8;
@@ -289,13 +339,13 @@ g_convert_with_fallback (const gchar *str,
    gchar *outp;
    const gchar *insert_str = NULL;
    const gchar *p;
-  int inbytes_remaining;
+  gsize inbytes_remaining;   
    const gchar *save_p = NULL;
-  size_t save_inbytes = 0;
-  size_t outbytes_remaining;
-  size_t err;
+  gsize save_inbytes = 0;
+  gsize outbytes_remaining; 
+  gsize err;
    GIConv cd;
-  size_t outbuf_size;
+  gsize outbuf_size;
    gboolean have_error = FALSE;
    gboolean done = FALSE;
  
@@ -324,6 +374,8 @@ g_convert_with_fallback (const gchar *str,
    else
      g_error_free (local_error);
  
+  local_error = NULL;
+  
    /* No go; to proceed, we need a converter from "UTF-8" to
     * to_codeset, and the string as UTF-8.
     */
@@ -353,6 +405,7 @@ g_convert_with_fallback (const gchar *str,
     * for the original string while we are converting the fallback
     */
    p = utf8;
+
    outbuf_size = len + 1; /* + 1 for nul in case len == 1 */
    outbytes_remaining = outbuf_size - 1; /* -1 for nul */
    outp = dest = g_malloc (outbuf_size);
@@ -373,6 +426,7 @@ g_convert_with_fallback (const gchar *str,
             case E2BIG:
               {
                 size_t used = outp - dest;
+
                 outbuf_size *= 2;
                 dest = g_realloc (dest, outbuf_size);
                 
@@ -458,46 +512,101 @@ g_convert_with_fallback (const gchar *str,
  /*
   * g_locale_to_utf8
   *
+ * 
+ */
+
+static gchar *
+strdup_len (const gchar *string,
+           gssize       len,
+           gsize       *bytes_written,
+           gsize       *bytes_read)
+        
+{
+  gsize real_len;
+
+  if (len < 0)
+    real_len = strlen (string);
+  else
+    {
+      real_len = 0;
+      
+      while (real_len < len && string[real_len])
+       real_len++;
+    }
+  
+  if (bytes_read)
+    *bytes_read = real_len;
+  if (bytes_written)
+    *bytes_written = real_len;
+
+  return g_strndup (string, real_len);
+}
+
+/**
+ * g_locale_to_utf8:
+ * @opsysstring:   a string in the encoding of the current locale
+ * @len:           the length of the string, or -1 if the string is
+ *                 NULL-terminated.
+ * @bytes_read:    location to store the number of bytes in the
+ *                 input string that were successfully converted, or %NULL.
+ *                 Even if the conversion was succesful, this may be 
+ *                 less than len if there were partial characters
+ *                 at the end of the input. If the error
+ *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
+ *                 stored will the byte fofset after the last valid
+ *                 input sequence.
+ * @bytes_written: the stored in the output buffer (not including the
+ *                 terminating nul.
+ * @error: location to store the error occuring, or %NULL to ignore
+ *                 errors. Any of the errors in #GConvertError may occur.
+ * 
   * Converts a string which is in the encoding used for strings by
   * the C runtime (usually the same as that used by the operating
   * system) in the current locale into a UTF-8 string.
- */
-
+ * 
+ * Return value: The converted string, or %NULL on an error.
+ **/
  gchar *
-g_locale_to_utf8 (const gchar *opsysstring, GError **error)
+g_locale_to_utf8 (const gchar  *opsysstring,
+                 gssize        len,            
+                 gsize        *bytes_read,    
+                 gsize        *bytes_written,
+                 GError      **error)
  {
-#ifdef G_OS_WIN32
+#ifdef G_PLATFORM_WIN32
  
-  gint i, clen, wclen, first;
-  const gint len = strlen (opsysstring);
+  gint i, clen, total_len, wclen, first;
    wchar_t *wcs, wc;
    gchar *result, *bp;
    const wchar_t *wcp;
  
+  if (len == -1)
+    len = strlen (opsysstring);
+  
    wcs = g_new (wchar_t, len);
    wclen = MultiByteToWideChar (CP_ACP, 0, opsysstring, len, wcs, len);
  
    wcp = wcs;
-  clen = 0;
+  total_len = 0;
    for (i = 0; i < wclen; i++)
      {
        wc = *wcp++;
  
        if (wc < 0x80)
-       clen += 1;
+       total_len += 1;
        else if (wc < 0x800)
-       clen += 2;
+       total_len += 2;
        else if (wc < 0x10000)
-       clen += 3;
+       total_len += 3;
        else if (wc < 0x200000)
-       clen += 4;
+       total_len += 4;
        else if (wc < 0x4000000)
-       clen += 5;
+       total_len += 5;
        else
-       clen += 6;
+       total_len += 6;
      }
  
-  result = g_malloc (clen + 1);
+  result = g_malloc (total_len + 1);
    
    wcp = wcs;
    bp = result;
@@ -553,40 +662,68 @@ g_locale_to_utf8 (const gchar *opsysstring, GError **error)
  
    g_free (wcs);
  
+  if (bytes_read)
+    *bytes_read = len;
+  if (bytes_written)
+    *bytes_written = total_len;
+  
    return result;
  
-#else
+#else  /* !G_PLATFORM_WIN32 */
  
-  char *charset, *str;
+  const char *charset;
  
    if (g_get_charset (&charset))
-    return g_strdup (opsysstring);
+    return strdup_len (opsysstring, len, bytes_read, bytes_written);
+  else
+    return g_convert (opsysstring, len, 
+                     "UTF-8", charset, bytes_read, bytes_written, error);
  
-  str = g_convert (opsysstring, strlen (opsysstring), 
-                  "UTF-8", charset, NULL, NULL, error);
-  
-  return str;
-#endif
+#endif /* !G_PLATFORM_WIN32 */
  }
  
-/*
- * g_locale_from_utf8
- *
- * The reverse of g_locale_to_utf8.
- */
-
+/**
+ * g_locale_from_utf8:
+ * @utf8string:    a UTF-8 encoded string 
+ * @len:           the length of the string, or -1 if the string is
+ *                 NULL-terminated.
+ * @bytes_read:    location to store the number of bytes in the
+ *                 input string that were successfully converted, or %NULL.
+ *                 Even if the conversion was succesful, this may be 
+ *                 less than len if there were partial characters
+ *                 at the end of the input. If the error
+ *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
+ *                 stored will the byte fofset after the last valid
+ *                 input sequence.
+ * @bytes_written: the stored in the output buffer (not including the
+ *                 terminating nul.
+ * @error: location to store the error occuring, or %NULL to ignore
+ *                 errors. Any of the errors in #GConvertError may occur.
+ * 
+ * Converts a string from UTF-8 to the encoding used for strings by
+ * the C runtime (usually the same as that used by the operating
+ * system) in the current locale.
+ * 
+ * Return value: The converted string, or %NULL on an error.
+ **/
  gchar *
-g_locale_from_utf8 (const gchar *utf8string, GError **error)
+g_locale_from_utf8 (const gchar *utf8string,
+                   gssize       len,            
+                   gsize       *bytes_read,    
+                   gsize       *bytes_written,
+                   GError     **error)
  {
-#ifdef G_OS_WIN32
+#ifdef G_PLATFORM_WIN32
  
    gint i, mask, clen, mblen;
-  const gint len = strlen (utf8string);
    wchar_t *wcs, *wcp;
    gchar *result;
    guchar *cp, *end, c;
    gint n;
    
+  if (len == -1)
+    len = strlen (utf8string);
+  
    /* First convert to wide chars */
    cp = (guchar *) utf8string;
    end = cp + len;
@@ -671,49 +808,109 @@ g_locale_from_utf8 (const gchar *utf8string, GError **error)
    result[mblen] = 0;
    g_free (wcs);
  
+  if (bytes_read)
+    *bytes_read = len;
+  if (bytes_written)
+    *bytes_written = mblen;
+  
    return result;
  
-#else
-
-  gchar *charset, *str;
+#else  /* !G_PLATFORM_WIN32 */
+  
+  const gchar *charset;
  
    if (g_get_charset (&charset))
-    return g_strdup (utf8string);
-
-  str = g_convert (utf8string, strlen (utf8string), 
-                  charset, "UTF-8", NULL, NULL, error);
+    return strdup_len (utf8string, len, bytes_read, bytes_written);
+  else
+    return g_convert (utf8string, len,
+                     charset, "UTF-8", bytes_read, bytes_written, error);
  
-  return str;
-  
-#endif
+#endif /* !G_PLATFORM_WIN32 */
  }
  
-/* Filenames are in UTF-8 unless specificially requested otherwise */
-
+/**
+ * g_filename_to_utf8:
+ * @opsysstring:   a string in the encoding for filenames
+ * @len:           the length of the string, or -1 if the string is
+ *                 NULL-terminated.
+ * @bytes_read:    location to store the number of bytes in the
+ *                 input string that were successfully converted, or %NULL.
+ *                 Even if the conversion was succesful, this may be 
+ *                 less than len if there were partial characters
+ *                 at the end of the input. If the error
+ *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
+ *                 stored will the byte fofset after the last valid
+ *                 input sequence.
+ * @bytes_written: the stored in the output buffer (not including the
+ *                 terminating nul.
+ * @error: location to store the error occuring, or %NULL to ignore
+ *                 errors. Any of the errors in #GConvertError may occur.
+ * 
+ * Converts a string which is in the encoding used for filenames
+ * into a UTF-8 string.
+ * 
+ * Return value: The converted string, or %NULL on an error.
+ **/
  gchar*
-g_filename_to_utf8 (const gchar *string, GError **error)
+g_filename_to_utf8 (const gchar *opsysstring, 
+                   gssize       len,           
+                   gsize       *bytes_read,   
+                   gsize       *bytes_written,
+                   GError     **error)
  {
-#ifdef G_OS_WIN32
-  return g_locale_to_utf8 (string, error);
-#else
+#ifdef G_PLATFORM_WIN32
+  return g_locale_to_utf8 (opsysstring, len,
+                          bytes_read, bytes_written,
+                          error);
+#else  /* !G_PLATFORM_WIN32 */
    if (getenv ("G_BROKEN_FILENAMES"))
-    return g_locale_to_utf8 (string, error);
-
-  return g_strdup (string);
-#endif
+    return g_locale_to_utf8 (opsysstring, len,
+                            bytes_read, bytes_written,
+                            error);
+  else
+    return strdup_len (opsysstring, len, bytes_read, bytes_written);
+#endif /* !G_PLATFORM_WIN32 */
  }
  
+/**
+ * g_filename_from_utf8:
+ * @utf8string:    a UTF-8 encoded string 
+ * @len:           the length of the string, or -1 if the string is
+ *                 NULL-terminated.
+ * @bytes_read:    location to store the number of bytes in the
+ *                 input string that were successfully converted, or %NULL.
+ *                 Even if the conversion was succesful, this may be 
+ *                 less than len if there were partial characters
+ *                 at the end of the input. If the error
+ *                 G_CONVERT_ERROR_ILLEGAL_SEQUENCE occurs, the value
+ *                 stored will the byte fofset after the last valid
+ *                 input sequence.
+ * @bytes_written: the stored in the output buffer (not including the
+ *                 terminating nul.
+ * @error: location to store the error occuring, or %NULL to ignore
+ *                 errors. Any of the errors in #GConvertError may occur.
+ * 
+ * Converts a string from UTF-8 to the encoding used for filenames.
+ * 
+ * Return value: The converted string, or %NULL on an error.
+ **/
  gchar*
-g_filename_from_utf8 (const gchar *string, GError **error)
+g_filename_from_utf8 (const gchar *utf8string,
+                     gssize       len,            
+                     gsize       *bytes_read,    
+                     gsize       *bytes_written,
+                     GError     **error)
  {
-#ifdef G_OS_WIN32
-  return g_locale_from_utf8 (string, error);
-#else
+#ifdef G_PLATFORM_WIN32
+  return g_locale_from_utf8 (utf8string, len,
+                            bytes_read, bytes_written,
+                            error);
+#else  /* !G_PLATFORM_WIN32 */
    if (getenv ("G_BROKEN_FILENAMES"))
-    return g_locale_from_utf8 (string, error);
-
-  return g_strdup (string);
-#endif
+    return g_locale_from_utf8 (utf8string, len,
+                              bytes_read, bytes_written,
+                              error);
+  else
+    return strdup_len (utf8string, len, bytes_read, bytes_written);
+#endif /* !G_PLATFORM_WIN32 */
  }
-
-