gio/gurifuncs.c

   1 /* GIO - GLib Input, Output and Streaming Library
   2  *
   3  * Copyright (C) 2006-2007 Red Hat, Inc.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General
  16  * Public License along with this library; if not, write to the
  17  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  18  * Boston, MA 02111-1307, USA.
  19  *
  20  * Author: Alexander Larsson <alexl@redhat.com>
  21  */
  22
  23 #include <config.h>
  24 #include "gurifuncs.h"
  25 #include "string.h"
  26
  27 static int
  28 unescape_character (const char *scanner)
  29 {
  30   int first_digit;
  31   int second_digit;
  32
  33   first_digit = g_ascii_xdigit_value (*scanner++);
  34   if (first_digit < 0)
  35     return -1;
  36
  37   second_digit = g_ascii_xdigit_value (*scanner++);
  38   if (second_digit < 0)
  39     return -1;
  40
  41   return (first_digit << 4) | second_digit;
  42 }
  43
  44 /**
  45  * g_uri_unescape_segment:
  46  * @escaped_string: a string.
  47  * @escaped_string_end: a string.
  48  * @illegal_characters: a string of illegal characters not to be allowed.
  49  *
  50  * Returns: an unescaped version of @escaped_string or %NULL on error.
  51  * The returned string should be freed when no longer needed.
  52  **/
  53 char *
  54 g_uri_unescape_segment (const char *escaped_string,
  55                         const char *escaped_string_end,
  56                         const char *illegal_characters)
  57 {
  58   const char *in;
  59   char *out, *result;
  60   gint character;
  61
  62   if (escaped_string == NULL)
  63     return NULL;
  64
  65   if (escaped_string_end == NULL)
  66     escaped_string_end = escaped_string + strlen (escaped_string);
  67
  68   result = g_malloc (escaped_string_end - escaped_string + 1);
  69
  70   out = result;
  71   for (in = escaped_string; in < escaped_string_end; in++)
  72     {
  73       character = *in;
  74
  75       if (*in == '%')
  76         {
  77           in++;
  78
  79           if (escaped_string_end - in < 2)
  80             {
  81               /* Invalid escaped char (to short) */
  82               g_free (result);
  83               return NULL;
  84             }
  85
  86           character = unescape_character (in);
  87
  88           /* Check for an illegal character. We consider '\0' illegal here. */
  89           if (character <= 0 ||
  90               (illegal_characters != NULL &&
  91                strchr (illegal_characters, (char)character) != NULL))
  92             {
  93               g_free (result);
  94               return NULL;
  95             }
  96
  97           in++; /* The other char will be eaten in the loop header */
  98         }
  99       *out++ = (char)character;
 100     }
 101
 102   *out = '\0';
 103
 104   return result;
 105 }
 106
 107 /**
 108  * g_uri_unescape_string:
 109  * @escaped_string: an escaped string to be unescaped.
 110  * @illegal_characters: a string of illegal characters not to be allowed.
 111  *
 112  * Returns: an unescaped version of @escaped_string.
 113  *
 114  * The returned string should be freed when no longer needed
 115  *
 116  **/
 117 char *
 118 g_uri_unescape_string (const char *escaped_string,
 119                        const char *illegal_characters)
 120 {
 121   return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
 122 }
 123
 124 /**
 125  * g_uri_get_scheme:
 126  * @uri: a valid URI.
 127  *
 128  * Returns: The "Scheme" component of the URI, or %NULL on error.
 129  * RFC 3986 decodes the scheme as:
 130  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 131  * Popular schemes include "file", "http", "svn", etc.
 132  *
 133  * The returned string should be freed when no longer needed.
 134  *
 135  **/
 136 char *
 137 g_uri_get_scheme (const char  *uri)
 138 {
 139   const char *p;
 140   char c;
 141
 142   g_return_val_if_fail (uri != NULL, NULL);
 143
 144   /* From RFC 3986 Decodes:
 145    * URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 146    */
 147
 148   p = uri;
 149
 150   /* Decode scheme:
 151      scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 152   */
 153
 154   if (!g_ascii_isalpha (*p))
 155     return NULL;
 156
 157   while (1)
 158     {
 159       c = *p++;
 160
 161       if (c == ':')
 162         break;
 163
 164       if (!(g_ascii_isalnum(c) ||
 165             c == '+' ||
 166             c == '-' ||
 167             c == '.'))
 168         return NULL;
 169     }
 170
 171   return g_strndup (uri, p - uri - 1);
 172 }
 173
 174 #define SUB_DELIM_CHARS  "!$&'()*+,;="
 175
 176 static gboolean
 177 is_valid (char c, const char *reserved_chars_allowed)
 178 {
 179   if (g_ascii_isalnum (c) ||
 180       c == '-' ||
 181       c == '.' ||
 182       c == '_' ||
 183       c == '~')
 184     return TRUE;
 185
 186   if (reserved_chars_allowed &&
 187       strchr (reserved_chars_allowed, c) != NULL)
 188     return TRUE;
 189
 190   return FALSE;
 191 }
 192
 193 static gboolean
 194 gunichar_ok (gunichar c)
 195 {
 196   return
 197     (c != (gunichar) -2) &&
 198     (c != (gunichar) -1);
 199 }
 200
 201 /**
 202  * g_string_append_uri_escaped:
 203  * @string: a #GString to append to.
 204  * @unescaped: the input C string of unescaped URI data.
 205  * @reserved_chars_allowed: a string of reserve characters allowed to be used.
 206  * @allow_utf8: set %TRUE if the return value may include UTF8 characters.
 207  *
 208  * Returns a #GString with the escaped URI appended.
 209  *
 210  **/
 211 GString *
 212 g_string_append_uri_escaped (GString *string,
 213                              const char *unescaped,
 214                              const char *reserved_chars_allowed,
 215                              gboolean allow_utf8)
 216 {
 217   unsigned char c;
 218   const char *end;
 219   static const gchar hex[16] = "0123456789ABCDEF";
 220
 221   g_return_val_if_fail (string != NULL, NULL);
 222   g_return_val_if_fail (unescaped != NULL, NULL);
 223
 224   end = unescaped + strlen (unescaped);
 225
 226   while ((c = *unescaped) != 0)
 227     {
 228       if (c >= 0x80 && allow_utf8 &&
 229           gunichar_ok (g_utf8_get_char_validated (unescaped, end - unescaped)))
 230         {
 231           int len = g_utf8_skip [c];
 232           g_string_append_len (string, unescaped, len);
 233           unescaped += len;
 234         }
 235       else if (is_valid (c, reserved_chars_allowed))
 236         {
 237           g_string_append_c (string, c);
 238           unescaped++;
 239         }
 240       else
 241         {
 242           g_string_append_c (string, '%');
 243           g_string_append_c (string, hex[((guchar)c) >> 4]);
 244           g_string_append_c (string, hex[((guchar)c) & 0xf]);
 245           unescaped++;
 246         }
 247     }
 248
 249   return string;
 250 }
 251
 252 /**
 253  * g_uri_escape_string:
 254  * @unescaped: the unescaped input string.
 255  * @reserved_chars_allowed: a string of reserve characters allowed to be used.
 256  * @allow_utf8: set to %TRUE if string can include UTF8 characters.
 257  *
 258  * Returns an escaped version of @unescaped.
 259  *
 260  * The returned string should be freed when no longer needed.
 261  **/
 262 char *
 263 g_uri_escape_string (const char *unescaped,
 264                      const char  *reserved_chars_allowed,
 265                      gboolean     allow_utf8)
 266 {
 267   GString *s;
 268
 269   g_return_val_if_fail (unescaped != NULL, NULL);
 270
 271   s = g_string_sized_new (strlen (unescaped) + 10);
 272
 273   g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
 274
 275   return g_string_free (s, FALSE);
 276 }