glib/gurifuncs.c

   1 /* GIO - GLib Input, Output and Streaming Library
   2  *
   3  * Copyright (C) 2006-2007 Red Hat, Inc.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General
  16  * Public License along with this library; if not, write to the
  17  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  18  * Boston, MA 02111-1307, USA.
  19  *
  20  * Author: Alexander Larsson <alexl@redhat.com>
  21  */
  22
  23 #include <config.h>
  24 #include "gurifuncs.h"
  25 #include "string.h"
  26
  27 #include "galias.h"
  28
  29 /**
  30  * SECTION:gurifuncs
  31  * @short_description: URI Functions
  32  *
  33  * Functions for manipulating Universal Resource Identifiers (URIs) as
  34  * defined by RFC 3986. It is highly recommended that you have read and
  35  * understand RFC 3986 for understanding this API. A copy of RFC 3986
  36  * can be found at <ulink url="http://www.ietf.org/rfc/rfc3986.txt"/>.
  37  **/
  38
  39 static int
  40 unescape_character (const char *scanner)
  41 {
  42   int first_digit;
  43   int second_digit;
  44
  45   first_digit = g_ascii_xdigit_value (*scanner++);
  46   if (first_digit < 0)
  47     return -1;
  48
  49   second_digit = g_ascii_xdigit_value (*scanner++);
  50   if (second_digit < 0)
  51     return -1;
  52
  53   return (first_digit << 4) | second_digit;
  54 }
  55
  56 /**
  57  * g_uri_unescape_segment:
  58  * @escaped_string: a string.
  59  * @escaped_string_end: a string.
  60  * @illegal_characters: an optional string of illegal characters not to be allowed.
  61  *
  62  * Unescapes a segment of an escaped string.
  63  *
  64  * If any of the characters in @illegal_characters or the character zero appears
  65  * as an escaped character in @escaped_string then that is an error and %NULL
  66  * will be returned. This is useful it you want to avoid for instance having a
  67  * slash being expanded in an escaped path element, which might confuse pathname
  68  * handling.
  69  *
  70  * Returns: an unescaped version of @escaped_string or %NULL on error.
  71  * The returned string should be freed when no longer needed.
  72  *
  73  * Since: 2.16
  74  **/
  75 char *
  76 g_uri_unescape_segment (const char *escaped_string,
  77                         const char *escaped_string_end,
  78                         const char *illegal_characters)
  79 {
  80   const char *in;
  81   char *out, *result;
  82   gint character;
  83
  84   if (escaped_string == NULL)
  85     return NULL;
  86
  87   if (escaped_string_end == NULL)
  88     escaped_string_end = escaped_string + strlen (escaped_string);
  89
  90   result = g_malloc (escaped_string_end - escaped_string + 1);
  91
  92   out = result;
  93   for (in = escaped_string; in < escaped_string_end; in++)
  94     {
  95       character = *in;
  96
  97       if (*in == '%')
  98         {
  99           in++;
 100
 101           if (escaped_string_end - in < 2)
 102             {
 103               /* Invalid escaped char (to short) */
 104               g_free (result);
 105               return NULL;
 106             }
 107
 108           character = unescape_character (in);
 109
 110           /* Check for an illegal character. We consider '\0' illegal here. */
 111           if (character <= 0 ||
 112               (illegal_characters != NULL &&
 113                strchr (illegal_characters, (char)character) != NULL))
 114             {
 115               g_free (result);
 116               return NULL;
 117             }
 118
 119           in++; /* The other char will be eaten in the loop header */
 120         }
 121       *out++ = (char)character;
 122     }
 123
 124   *out = '\0';
 125
 126   return result;
 127 }
 128
 129 /**
 130  * g_uri_unescape_string:
 131  * @escaped_string: an escaped string to be unescaped.
 132  * @illegal_characters: an optional string of illegal characters not to be allowed.
 133  *
 134  * Unescapes a whole escaped string.
 135  *
 136  * If any of the characters in @illegal_characters or the character zero appears
 137  * as an escaped character in @escaped_string then that is an error and %NULL
 138  * will be returned. This is useful it you want to avoid for instance having a
 139  * slash being expanded in an escaped path element, which might confuse pathname
 140  * handling.
 141  *
 142  * Returns: an unescaped version of @escaped_string. The returned string
 143  * should be freed when no longer needed.
 144  *
 145  * Since: 2.16
 146  **/
 147 char *
 148 g_uri_unescape_string (const char *escaped_string,
 149                        const char *illegal_characters)
 150 {
 151   return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
 152 }
 153
 154 /**
 155  * g_uri_parse_scheme:
 156  * @uri: a valid URI.
 157  *
 158  * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
 159  * <programlisting>
 160  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 161  * </programlisting>
 162  * Common schemes include "file", "http", "svn+ssh", etc.
 163  *
 164  * Returns: The "Scheme" component of the URI, or %NULL on error.
 165  * The returned string should be freed when no longer needed.
 166  *
 167  * Since: 2.16
 168  **/
 169 char *
 170 g_uri_parse_scheme (const char  *uri)
 171 {
 172   const char *p;
 173   char c;
 174
 175   g_return_val_if_fail (uri != NULL, NULL);
 176
 177   /* From RFC 3986 Decodes:
 178    * URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 179    */
 180
 181   p = uri;
 182
 183   /* Decode scheme:
 184      scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 185   */
 186
 187   if (!g_ascii_isalpha (*p))
 188     return NULL;
 189
 190   while (1)
 191     {
 192       c = *p++;
 193
 194       if (c == ':')
 195         break;
 196
 197       if (!(g_ascii_isalnum(c) ||
 198             c == '+' ||
 199             c == '-' ||
 200             c == '.'))
 201         return NULL;
 202     }
 203
 204   return g_strndup (uri, p - uri - 1);
 205 }
 206
 207 /**
 208  * g_uri_escape_string:
 209  * @unescaped: the unescaped input string.
 210  * @reserved_chars_allowed: a string of reserved characters that are
 211  *      allowed to be used.
 212  * @allow_utf8: %TRUE if the result can include UTF-8 characters.
 213  *
 214  * Escapes a string for use in a URI.
 215  *
 216  * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical
 217  * characters plus dash, dot, underscore and tilde) are escaped.
 218  * But if you specify characters in @reserved_chars_allowed they are not
 219  * escaped. This is useful for the "reserved" characters in the URI
 220  * specification, since those are allowed unescaped in some portions of
 221  * a URI.
 222  *
 223  * Returns: an escaped version of @unescaped. The returned string should be
 224  * freed when no longer needed.
 225  *
 226  * Since: 2.16
 227  **/
 228 char *
 229 g_uri_escape_string (const char *unescaped,
 230                      const char  *reserved_chars_allowed,
 231                      gboolean     allow_utf8)
 232 {
 233   GString *s;
 234
 235   g_return_val_if_fail (unescaped != NULL, NULL);
 236
 237   s = g_string_sized_new (strlen (unescaped) + 10);
 238
 239   g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
 240
 241   return g_string_free (s, FALSE);
 242 }
 243
 244 #define __G_URI_FUNCS_C__
 245 #include "galiasdef.c"