glib/gurifuncs.c

   1 /* GIO - GLib Input, Output and Streaming Library
   2  *
   3  * Copyright (C) 2006-2007 Red Hat, Inc.
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2 of the License, or (at your option) any later version.
   9  *
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU Lesser General
  16  * Public License along with this library; if not, write to the
  17  * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
  18  * Boston, MA 02111-1307, USA.
  19  *
  20  * Author: Alexander Larsson <alexl@redhat.com>
  21  */
  22
  23 #include "gurifuncs.h"
  24
  25 #include <glib/gstrfuncs.h>
  26 #include <glib/gmessages.h>
  27 #include <glib/gstring.h>
  28 #include <glib/gmem.h>
  29
  30 #include <string.h>
  31
  32 #include "config.h"
  33 #include "galias.h"
  34
  35 /**
  36  * SECTION:gurifuncs
  37  * @short_description: URI Functions
  38  *
  39  * Functions for manipulating Universal Resource Identifiers (URIs) as
  40  * defined by <ulink url="http://www.ietf.org/rfc/rfc3986.txt">
  41  * RFC 3986</ulink>. It is highly recommended that you have read and
  42  * understand RFC 3986 for understanding this API.
  43  */
  44
  45 static int
  46 unescape_character (const char *scanner)
  47 {
  48   int first_digit;
  49   int second_digit;
  50
  51   first_digit = g_ascii_xdigit_value (*scanner++);
  52   if (first_digit < 0)
  53     return -1;
  54
  55   second_digit = g_ascii_xdigit_value (*scanner++);
  56   if (second_digit < 0)
  57     return -1;
  58
  59   return (first_digit << 4) | second_digit;
  60 }
  61
  62 /**
  63  * g_uri_unescape_segment:
  64  * @escaped_string: a string.
  65  * @escaped_string_end: a string.
  66  * @illegal_characters: an optional string of illegal characters not to be allowed.
  67  *
  68  * Unescapes a segment of an escaped string.
  69  *
  70  * If any of the characters in @illegal_characters or the character zero appears
  71  * as an escaped character in @escaped_string then that is an error and %NULL
  72  * will be returned. This is useful it you want to avoid for instance having a
  73  * slash being expanded in an escaped path element, which might confuse pathname
  74  * handling.
  75  *
  76  * Returns: an unescaped version of @escaped_string or %NULL on error.
  77  * The returned string should be freed when no longer needed.
  78  *
  79  * Since: 2.16
  80  **/
  81 char *
  82 g_uri_unescape_segment (const char *escaped_string,
  83                         const char *escaped_string_end,
  84                         const char *illegal_characters)
  85 {
  86   const char *in;
  87   char *out, *result;
  88   gint character;
  89
  90   if (escaped_string == NULL)
  91     return NULL;
  92
  93   if (escaped_string_end == NULL)
  94     escaped_string_end = escaped_string + strlen (escaped_string);
  95
  96   result = g_malloc (escaped_string_end - escaped_string + 1);
  97
  98   out = result;
  99   for (in = escaped_string; in < escaped_string_end; in++)
 100     {
 101       character = *in;
 102
 103       if (*in == '%')
 104         {
 105           in++;
 106
 107           if (escaped_string_end - in < 2)
 108             {
 109               /* Invalid escaped char (to short) */
 110               g_free (result);
 111               return NULL;
 112             }
 113
 114           character = unescape_character (in);
 115
 116           /* Check for an illegal character. We consider '\0' illegal here. */
 117           if (character <= 0 ||
 118               (illegal_characters != NULL &&
 119                strchr (illegal_characters, (char)character) != NULL))
 120             {
 121               g_free (result);
 122               return NULL;
 123             }
 124
 125           in++; /* The other char will be eaten in the loop header */
 126         }
 127       *out++ = (char)character;
 128     }
 129
 130   *out = '\0';
 131
 132   return result;
 133 }
 134
 135 /**
 136  * g_uri_unescape_string:
 137  * @escaped_string: an escaped string to be unescaped.
 138  * @illegal_characters: an optional string of illegal characters not to be allowed.
 139  *
 140  * Unescapes a whole escaped string.
 141  *
 142  * If any of the characters in @illegal_characters or the character zero appears
 143  * as an escaped character in @escaped_string then that is an error and %NULL
 144  * will be returned. This is useful it you want to avoid for instance having a
 145  * slash being expanded in an escaped path element, which might confuse pathname
 146  * handling.
 147  *
 148  * Returns: an unescaped version of @escaped_string. The returned string
 149  * should be freed when no longer needed.
 150  *
 151  * Since: 2.16
 152  **/
 153 char *
 154 g_uri_unescape_string (const char *escaped_string,
 155                        const char *illegal_characters)
 156 {
 157   return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
 158 }
 159
 160 /**
 161  * g_uri_parse_scheme:
 162  * @uri: a valid URI.
 163  *
 164  * Gets the scheme portion of a URI string. RFC 3986 decodes the scheme as:
 165  * <programlisting>
 166  * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 167  * </programlisting>
 168  * Common schemes include "file", "http", "svn+ssh", etc.
 169  *
 170  * Returns: The "Scheme" component of the URI, or %NULL on error.
 171  * The returned string should be freed when no longer needed.
 172  *
 173  * Since: 2.16
 174  **/
 175 char *
 176 g_uri_parse_scheme (const char  *uri)
 177 {
 178   const char *p;
 179   char c;
 180
 181   g_return_val_if_fail (uri != NULL, NULL);
 182
 183   /* From RFC 3986 Decodes:
 184    * URI         = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
 185    */
 186
 187   p = uri;
 188
 189   /* Decode scheme:
 190      scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
 191   */
 192
 193   if (!g_ascii_isalpha (*p))
 194     return NULL;
 195
 196   while (1)
 197     {
 198       c = *p++;
 199
 200       if (c == ':')
 201         break;
 202
 203       if (!(g_ascii_isalnum(c) ||
 204             c == '+' ||
 205             c == '-' ||
 206             c == '.'))
 207         return NULL;
 208     }
 209
 210   return g_strndup (uri, p - uri - 1);
 211 }
 212
 213 /**
 214  * g_uri_escape_string:
 215  * @unescaped: the unescaped input string.
 216  * @reserved_chars_allowed: a string of reserved characters that are
 217  *      allowed to be used.
 218  * @allow_utf8: %TRUE if the result can include UTF-8 characters.
 219  *
 220  * Escapes a string for use in a URI.
 221  *
 222  * Normally all characters that are not "unreserved" (i.e. ASCII alphanumerical
 223  * characters plus dash, dot, underscore and tilde) are escaped.
 224  * But if you specify characters in @reserved_chars_allowed they are not
 225  * escaped. This is useful for the "reserved" characters in the URI
 226  * specification, since those are allowed unescaped in some portions of
 227  * a URI.
 228  *
 229  * Returns: an escaped version of @unescaped. The returned string should be
 230  * freed when no longer needed.
 231  *
 232  * Since: 2.16
 233  **/
 234 char *
 235 g_uri_escape_string (const char *unescaped,
 236                      const char  *reserved_chars_allowed,
 237                      gboolean     allow_utf8)
 238 {
 239   GString *s;
 240
 241   g_return_val_if_fail (unescaped != NULL, NULL);
 242
 243   s = g_string_sized_new (strlen (unescaped) + 10);
 244
 245   g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
 246
 247   return g_string_free (s, FALSE);
 248 }
 249
 250 #define __G_URI_FUNCS_C__
 251 #include "galiasdef.c"