1 /* GIO - GLib Input, Output and Streaming Library
3 * Copyright (C) 2006-2007 Red Hat, Inc.
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General
16 * Public License along with this library; if not, write to the
17 * Free Software Foundation, Inc., 59 Temple Place, Suite 330,
18 * Boston, MA 02111-1307, USA.
20 * Author: Alexander Larsson <alexl@redhat.com>
24 #include "gurifuncs.h"
28 unescape_character (const char *scanner)
33 first_digit = g_ascii_xdigit_value (*scanner++);
37 second_digit = g_ascii_xdigit_value (*scanner++);
41 return (first_digit << 4) | second_digit;
45 * g_uri_unescape_segment:
46 * @escaped_string: a string.
47 * @escaped_string_end: a string.
48 * @illegal_characters: a string of illegal characters not to be allowed.
50 * Returns: an unescaped version of @escaped_string or %NULL on error.
51 * The returned string should be freed when no longer needed.
54 g_uri_unescape_segment (const char *escaped_string,
55 const char *escaped_string_end,
56 const char *illegal_characters)
62 if (escaped_string == NULL)
65 if (escaped_string_end == NULL)
66 escaped_string_end = escaped_string + strlen (escaped_string);
68 result = g_malloc (escaped_string_end - escaped_string + 1);
71 for (in = escaped_string; in < escaped_string_end; in++)
79 if (escaped_string_end - in < 2)
81 /* Invalid escaped char (to short) */
86 character = unescape_character (in);
88 /* Check for an illegal character. We consider '\0' illegal here. */
90 (illegal_characters != NULL &&
91 strchr (illegal_characters, (char)character) != NULL))
97 in++; /* The other char will be eaten in the loop header */
99 *out++ = (char)character;
108 * g_uri_unescape_string:
109 * @escaped_string: an escaped string to be unescaped.
110 * @illegal_characters: a string of illegal characters not to be allowed.
112 * Returns: an unescaped version of @escaped_string.
114 * The returned string should be freed when no longer needed
118 g_uri_unescape_string (const char *escaped_string,
119 const char *illegal_characters)
121 return g_uri_unescape_segment (escaped_string, NULL, illegal_characters);
128 * Returns: The "Scheme" component of the URI, or %NULL on error.
129 * RFC 3986 decodes the scheme as:
130 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
131 * Popular schemes include "file", "http", "svn", etc.
133 * The returned string should be freed when no longer needed.
137 g_uri_get_scheme (const char *uri)
142 g_return_val_if_fail (uri != NULL, NULL);
144 /* From RFC 3986 Decodes:
145 * URI = scheme ":" hier-part [ "?" query ] [ "#" fragment ]
151 scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
154 if (!g_ascii_isalpha (*p))
164 if (!(g_ascii_isalnum(c) ||
171 return g_strndup (uri, p - uri - 1);
174 #define SUB_DELIM_CHARS "!$&'()*+,;="
177 is_valid (char c, const char *reserved_chars_allowed)
179 if (g_ascii_isalnum (c) ||
186 if (reserved_chars_allowed &&
187 strchr (reserved_chars_allowed, c) != NULL)
194 gunichar_ok (gunichar c)
197 (c != (gunichar) -2) &&
198 (c != (gunichar) -1);
202 * g_string_append_uri_escaped:
203 * @string: a #GString to append to.
204 * @unescaped: the input C string of unescaped URI data.
205 * @reserved_chars_allowed: a string of reserve characters allowed to be used.
206 * @allow_utf8: set %TRUE if the return value may include UTF8 characters.
208 * Returns a #GString with the escaped URI appended.
212 g_string_append_uri_escaped (GString *string,
213 const char *unescaped,
214 const char *reserved_chars_allowed,
219 static const gchar hex[16] = "0123456789ABCDEF";
221 g_return_val_if_fail (string != NULL, NULL);
222 g_return_val_if_fail (unescaped != NULL, NULL);
224 end = unescaped + strlen (unescaped);
226 while ((c = *unescaped) != 0)
228 if (c >= 0x80 && allow_utf8 &&
229 gunichar_ok (g_utf8_get_char_validated (unescaped, end - unescaped)))
231 int len = g_utf8_skip [c];
232 g_string_append_len (string, unescaped, len);
235 else if (is_valid (c, reserved_chars_allowed))
237 g_string_append_c (string, c);
242 g_string_append_c (string, '%');
243 g_string_append_c (string, hex[((guchar)c) >> 4]);
244 g_string_append_c (string, hex[((guchar)c) & 0xf]);
253 * g_uri_escape_string:
254 * @unescaped: the unescaped input string.
255 * @reserved_chars_allowed: a string of reserve characters allowed to be used.
256 * @allow_utf8: set to %TRUE if string can include UTF8 characters.
258 * Returns an escaped version of @unescaped.
260 * The returned string should be freed when no longer needed.
263 g_uri_escape_string (const char *unescaped,
264 const char *reserved_chars_allowed,
269 g_return_val_if_fail (unescaped != NULL, NULL);
271 s = g_string_sized_new (strlen (unescaped) + 10);
273 g_string_append_uri_escaped (s, unescaped, reserved_chars_allowed, allow_utf8);
275 return g_string_free (s, FALSE);