1 /* qmark.c -- quote 'dangerous' filenames
3 Copyright (C) 2005, 2007, 2009, 2010, 2011 Free Software Foundation,
5 Derived from courutils' ls.c:
6 Copyright (C) 85, 88, 90, 91, 1995-2005 Free Software Foundation, Inc.
8 This program is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or
11 (at your option) any later version.
13 This program is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
18 You should have received a copy of the GNU General Public License
19 along with this program. If not, see <http://www.gnu.org/licenses/>.
21 /* config.h must be included first. */
30 /* gnulib headers would go here if any needed to be included. */
33 #include "printquoted.h"
38 This comment, IN_CTYPE_DOMAIN and ISPRINT were borrowed from
39 coreutils at Sun Jun 5 21:17:40 2005 UTC.
43 "... Some ctype macros are valid only for character codes that
44 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
45 using /bin/cc or gcc but without giving an ansi option). So, all
46 ctype uses should be through macros like ISPRINT... If
47 STDC_HEADERS is defined, then autoconf has verified that the ctype
48 macros don't need to be guarded with references to isascii. ...
49 Defining isascii to 1 should let any compiler worth its salt
50 eliminate the && through constant folding."
54 "... Furthermore, isupper(c) etc. have an undefined result if c is
55 outside the range -1 <= c <= 255. One is tempted to write isupper(c)
56 with c being of type `char', but this is wrong if c is an 8-bit
57 character >= 128 which gets sign-extended to a negative value.
58 The macro ISUPPER protects against this as well."
60 (Actually that rule of ISUPPER is now taken by to_uchar).
64 # define IN_CTYPE_DOMAIN(c) 1
66 # define IN_CTYPE_DOMAIN(c) isascii(c)
69 /* ISPRINT is defined in <sys/euc.h> on at least Solaris2.6 systems. */
71 #define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
77 /* Convert a possibly-signed character to an unsigned character. This is
78 * a bit safer than casting to unsigned char, since it catches some type
79 * errors that the cast doesn't.
81 * This code taken from coreutils' system.h header at
82 * Sun Jun 5 21:05:21 2005 UTC.
84 static inline unsigned char to_uchar (char ch)
91 unibyte_qmark_chars (char *buf, size_t len)
94 char const *plimit = buf + len;
98 if (! ISPRINT (to_uchar (*p)))
109 /* Scan BUF, replacing any dangerous-looking characters with question
110 * marks. This code is taken from the ls.c file in coreutils as at
111 * Sun Jun 5 20:51:54 2005 UTC.
113 * This function may shrink the buffer. Either way, the new length
117 qmark_chars (char *buf, size_t len)
121 return unibyte_qmark_chars (buf, len);
126 char const *plimit = buf + len;
132 case ' ': case '!': case '"': case '#': case '%':
133 case '&': case '\'': case '(': case ')': case '*':
134 case '+': case ',': case '-': case '.': case '/':
135 case '0': case '1': case '2': case '3': case '4':
136 case '5': case '6': case '7': case '8': case '9':
137 case ':': case ';': case '<': case '=': case '>':
139 case 'A': case 'B': case 'C': case 'D': case 'E':
140 case 'F': case 'G': case 'H': case 'I': case 'J':
141 case 'K': case 'L': case 'M': case 'N': case 'O':
142 case 'P': case 'Q': case 'R': case 'S': case 'T':
143 case 'U': case 'V': case 'W': case 'X': case 'Y':
145 case '[': case '\\': case ']': case '^': case '_':
146 case 'a': case 'b': case 'c': case 'd': case 'e':
147 case 'f': case 'g': case 'h': case 'i': case 'j':
148 case 'k': case 'l': case 'm': case 'n': case 'o':
149 case 'p': case 'q': case 'r': case 's': case 't':
150 case 'u': case 'v': case 'w': case 'x': case 'y':
151 case 'z': case '{': case '|': case '}': case '~':
152 /* These characters are printable ASCII characters. */
156 /* If we have a multibyte sequence, copy it until we
157 reach its end, replacing each non-printable multibyte
158 character with a single question mark. */
161 memset (&mbstate, 0, sizeof mbstate);
168 bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
170 if (bytes == (size_t) -1)
172 /* An invalid multibyte sequence was
173 encountered. Skip one input byte, and
174 put a question mark. */
180 if (bytes == (size_t) -2)
182 /* An incomplete multibyte character
183 at the end. Replace it entirely with
191 /* A null wide character was encountered. */
197 /* A printable multibyte character.
199 for (; bytes > 0; --bytes)
204 /* An unprintable multibyte character.
205 Replace it entirely with a question
211 while (! mbsinit (&mbstate));
216 /* The buffer may have shrunk. */