3 * encode.c - string convertion routines (mostly for compatibility with
6 * Copyright (C) 2008 Kay Sievers <kay.sievers@vrfy.org>
7 * Copyright (C) 2009 Karel Zak <kzak@redhat.com>
9 * This file may be redistributed under the terms of the
10 * GNU Lesser General Public License.
23 #define UDEV_ALLOWED_CHARS_INPUT "/ $%?,"
25 /* count of characters used to encode one unicode char */
26 static int utf8_encoded_expected_len(const char *str)
28 unsigned char c = (unsigned char)str[0];
32 if ((c & 0xe0) == 0xc0)
34 if ((c & 0xf0) == 0xe0)
36 if ((c & 0xf8) == 0xf0)
38 if ((c & 0xfc) == 0xf8)
40 if ((c & 0xfe) == 0xfc)
45 /* decode one unicode char */
46 static int utf8_encoded_to_unichar(const char *str)
52 len = utf8_encoded_expected_len(str);
57 unichar = str[0] & 0x1f;
60 unichar = (int)str[0] & 0x0f;
63 unichar = (int)str[0] & 0x07;
66 unichar = (int)str[0] & 0x03;
69 unichar = (int)str[0] & 0x01;
75 for (i = 1; i < len; i++) {
76 if (((int)str[i] & 0xc0) != 0x80)
79 unichar |= (int)str[i] & 0x3f;
85 /* expected size used to encode one unicode char */
86 static int utf8_unichar_to_encoded_len(int unichar)
92 if (unichar < 0x10000)
94 if (unichar < 0x200000)
96 if (unichar < 0x4000000)
101 /* check if unicode char has a valid numeric range */
102 static int utf8_unichar_valid_range(int unichar)
104 if (unichar > 0x10ffff)
106 if ((unichar & 0xfffff800) == 0xd800)
108 if ((unichar > 0xfdcf) && (unichar < 0xfdf0))
110 if ((unichar & 0xffff) == 0xffff)
115 /* validate one encoded unicode char and return its length */
116 static int utf8_encoded_valid_unichar(const char *str)
122 len = utf8_encoded_expected_len(str);
130 /* check if expected encoded chars are available */
131 for (i = 0; i < len; i++)
132 if ((str[i] & 0x80) != 0x80)
135 unichar = utf8_encoded_to_unichar(str);
137 /* check if encoded length matches encoded value */
138 if (utf8_unichar_to_encoded_len(unichar) != len)
141 /* check if value has valid range */
142 if (!utf8_unichar_valid_range(unichar))
148 static int replace_whitespace(const char *str, char *to, size_t len)
152 /* strip trailing whitespace */
153 len = strnlen(str, len);
154 while (len && isspace(str[len-1]))
157 /* strip leading whitespace */
159 while (isspace(str[i]) && (i < len))
164 /* substitute multiple whitespace with a single '_' */
165 if (isspace(str[i])) {
166 while (isspace(str[i]))
176 static int is_whitelisted(char c, const char *white)
178 if ((c >= '0' && c <= '9') ||
179 (c >= 'A' && c <= 'Z') ||
180 (c >= 'a' && c <= 'z') ||
181 strchr("#+-.:=@_", c) != NULL ||
182 (white != NULL && strchr(white, c) != NULL))
187 /* allow chars in whitelist, plain ascii, hex-escaping and valid utf8 */
188 static int replace_chars(char *str, const char *white)
193 while (str[i] != '\0') {
196 if (is_whitelisted(str[i], white)) {
201 /* accept hex encoding */
202 if (str[i] == '\\' && str[i+1] == 'x') {
207 /* accept valid utf8 */
208 len = utf8_encoded_valid_unichar(&str[i]);
214 /* if space is allowed, replace whitespace with ordinary space */
215 if (isspace(str[i]) && white != NULL && strchr(white, ' ') != NULL) {
222 /* everything else is replaced with '_' */
231 * blkid_encode_string:
232 * @str: input string to be encoded
233 * @str_enc: output string to store the encoded input string
234 * @len: maximum size of the output string, which may be
235 * four times as long as the input string
237 * Encode all potentially unsafe characters of a string to the
238 * corresponding hex value prefixed by '\x'.
240 * Returns: 0 if the entire string was copied, non-zero otherwise.
242 int blkid_encode_string(const char *str, char *str_enc, size_t len)
246 if (str == NULL || str_enc == NULL)
249 for (i = 0, j = 0; str[i] != '\0'; i++) {
252 seqlen = utf8_encoded_valid_unichar(&str[i]);
254 if (len-j < (size_t)seqlen)
256 memcpy(&str_enc[j], &str[i], seqlen);
259 } else if (str[i] == '\\' || !is_whitelisted(str[i], NULL)) {
262 sprintf(&str_enc[j], "\\x%02x", (unsigned char) str[i]);
284 * @str_safe: output string
285 * @len: size of output string
287 * Allows plain ascii, hex-escaping and valid utf8. Replaces all whitespaces
290 int blkid_safe_string(const char *str, char *str_safe, size_t len)
292 replace_whitespace(str, str_safe, len);
293 replace_chars(str_safe, UDEV_ALLOWED_CHARS_INPUT);