2 * Copyright (C) 2009-2012 Erwin Waterlander
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice in the documentation and/or other materials provided with
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
21 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
24 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28 #if defined(D2U_UNICODE)
29 #if defined(WIN32) || defined(__CYGWIN__)
34 #if defined(__GLIBC__)
35 /* on glibc, canonicalize_file_name() broken prior to 2.4 (06-Mar-2006) */
36 # if __GNUC_PREREQ (2,4)
37 # define USE_CANONICALIZE_FILE_NAME 1
39 #elif defined(__CYGWIN__)
40 /* on cygwin, canonicalize_file_name() available since api 0/213 */
41 /* (1.7.0beta61, 25-Sep-09) */
42 # include <cygwin/version.h>
43 # if (CYGWIN_VERSION_DLL_COMBINED >= 213) && (CYGWIN_VERSION_DLL_MAJOR >= 1007)
44 # define USE_CANONICALIZE_FILE_NAME 1
49 /******************************************************************
51 * int symbolic_link(char *path)
53 * test if *path points to a file that exists and is a symbolic link
55 * returns 1 on success, 0 when it fails.
57 ******************************************************************/
58 int symbolic_link(char *path)
63 if (STAT(path, &buf) == 0)
65 if (S_ISLNK(buf.st_mode))
72 /******************************************************************
74 * int regfile(char *path, int allowSymlinks)
76 * test if *path points to a regular file (or is a symbolic link,
77 * if allowSymlinks != 0).
79 * returns 0 on success, -1 when it fails.
81 ******************************************************************/
82 int regfile(char *path, int allowSymlinks, CFlag *ipFlag, char *progname)
87 if (STAT(path, &buf) == 0)
90 fprintf(stderr, "%s: %s MODE 0%o ", progname, path, buf.st_mode);
92 if (S_ISSOCK(buf.st_mode))
93 fprintf(stderr, " (socket)");
96 if (S_ISLNK(buf.st_mode))
97 fprintf(stderr, " (symbolic link)");
99 if (S_ISREG(buf.st_mode))
100 fprintf(stderr, " (regular file)");
101 if (S_ISBLK(buf.st_mode))
102 fprintf(stderr, " (block device)");
103 if (S_ISDIR(buf.st_mode))
104 fprintf(stderr, " (directory)");
105 if (S_ISCHR(buf.st_mode))
106 fprintf(stderr, " (character device)");
107 if (S_ISFIFO(buf.st_mode))
108 fprintf(stderr, " (FIFO)");
109 fprintf(stderr, "\n");
111 if ((S_ISREG(buf.st_mode))
113 || (S_ISLNK(buf.st_mode) && allowSymlinks)
124 ipFlag->error = errno;
125 errstr = strerror(errno);
126 fprintf(stderr, "%s: %s: %s\n", progname, path, errstr);
132 /******************************************************************
134 * int regfile_target(char *path)
136 * test if *path points to a regular file (follow symbolic link)
138 * returns 0 on success, -1 when it fails.
140 ******************************************************************/
141 int regfile_target(char *path, CFlag *ipFlag, char *progname)
146 if (stat(path, &buf) == 0)
148 if (S_ISREG(buf.st_mode))
157 ipFlag->error = errno;
158 errstr = strerror(errno);
159 fprintf(stderr, "%s: %s: %s\n", progname, path, errstr);
165 void PrintBSDLicense(void)
167 fprintf(stderr, "%s", _("\
168 Redistribution and use in source and binary forms, with or without\n\
169 modification, are permitted provided that the following conditions\n\
171 1. Redistributions of source code must retain the above copyright\n\
172 notice, this list of conditions and the following disclaimer.\n\
173 2. Redistributions in binary form must reproduce the above copyright\n\
174 notice in the documentation and/or other materials provided with\n\
175 the distribution.\n\n\
177 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY\n\
178 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n\
179 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n\
180 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE\n\
181 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n\
182 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT\n\
183 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\n\
184 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n\
185 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\n\
186 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN\n\
187 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
191 void PrintUsage(char *progname)
195 Usage: %s [options] [file ...] [-n infile outfile ...]\n\
196 -ascii convert only line breaks (default)\n\
197 -iso conversion between DOS and ISO-8859-1 character set\n\
198 -1252 Use Windows code page 1252 (Western European)\n\
199 -437 Use DOS code page 437 (US) (default)\n\
200 -850 Use DOS code page 850 (Western European)\n\
201 -860 Use DOS code page 860 (Portuguese)\n\
202 -863 Use DOS code page 863 (French Canadian)\n\
203 -865 Use DOS code page 865 (Nordic)\n\
204 -7 Convert 8 bit characters to 7 bit space\n\
205 -c, --convmode conversion mode\n\
206 convmode ascii, 7bit, iso, mac, default to ascii\n\
207 -f, --force force conversion of binary files\n\
208 -h, --help give this help\n\
209 -k, --keepdate keep output file date\n\
210 -L, --license display software license\n\
211 -l, --newline add additional newline\n\
212 -m, --add-bom add UTF-8 Byte Order Mark\n\
213 -n, --newfile write to new file\n\
214 infile original file in new file mode\n\
215 outfile output file in new file mode\n\
216 -o, --oldfile write to old file\n\
217 file ... files to convert in old file mode\n\
218 -q, --quiet quiet mode, suppress all warnings\n\
219 always on in stdio mode\n\
220 -s, --safe skip binary files (default)\n"),
221 progname, VER_REVISION, VER_DATE, progname);
224 -F, --follow-symlink follow symbolic links and convert the targets\n\
225 -R, --replace-symlink replace symbolic links with converted files\n\
226 (original target files remain unchanged)\n\
227 -S, --skip-symlink keep symbolic links and targets unchanged (default)\n"));
230 -V, --version display version number\n"));
234 void PrintVersion(char *progname)
236 fprintf(stderr, "%s %s (%s)\n", progname, VER_REVISION, VER_DATE);
238 fprintf(stderr, "VER_AUTHOR: %s\n", VER_AUTHOR);
240 #if defined(__WATCOMC__) && defined(__I86__)
241 fprintf(stderr, "%s", _("DOS 16 bit version (WATCOMC).\n"));
242 #elif defined(__TURBOC__)
243 fprintf(stderr, "%s", _("DOS 16 bit version (TURBOC).\n"));
244 #elif defined(__WATCOMC__) && defined(__DOS__)
245 fprintf(stderr, "%s", _("DOS 32 bit version (WATCOMC).\n"));
247 fprintf(stderr, "%s", _("DOS 32 bit version (DJGPP).\n"));
248 #elif defined(__MSYS__)
249 fprintf(stderr, "%s", _("MSYS version.\n"));
250 #elif defined(__CYGWIN__)
251 fprintf(stderr, "%s", _("Cygwin version.\n"));
252 #elif defined(__WIN64__)
253 fprintf(stderr, "%s", _("Windows 64 bit version (MinGW-w64).\n"));
254 #elif defined(__WATCOMC__) && defined(__NT__)
255 fprintf(stderr, "%s", _("Windows 32 bit version (WATCOMC).\n"));
256 #elif defined(__WIN32__)
257 fprintf(stderr, "%s", _("Windows 32 bit version (MinGW).\n"));
258 #elif defined (__OS2__) /* OS/2 Warp */
259 fprintf(stderr, "%s", _("OS/2 version.\n"));
262 fprintf(stderr, "%s", _("With Unicode UTF-16 support.\n"));
264 fprintf(stderr, "%s", _("Without Unicode UTF-16 support.\n"));
267 fprintf(stderr, "%s", _("With native language support.\n"));
269 fprintf(stderr, "%s", "Without native language support.\n");
274 void PrintLocaledir(char *localedir)
276 fprintf(stderr, "LOCALEDIR: %s\n", localedir);
280 /* opens file of name ipFN in read only mode
281 * RetVal: NULL if failure
282 * file stream otherwise
284 FILE* OpenInFile(char *ipFN)
286 return (fopen(ipFN, R_CNTRL));
290 /* opens file of name ipFN in write only mode
291 * RetVal: NULL if failure
292 * file stream otherwise
294 FILE* OpenOutFile(int fd)
296 return (fdopen(fd, W_CNTRL));
299 #if defined(__TURBOC__) || defined(__MSYS__)
300 char *dirname(char *path)
304 if (( path == NULL) || (((ptr=strrchr(path,'/')) == NULL) && ((ptr=strrchr(path,'\\')) == NULL)) )
315 FILE* MakeTempFileFrom(const char *OutFN, char **fname_ret)
317 int MakeTempFileFrom(const char *OutFN, char **fname_ret)
320 char *cpy = strdup(OutFN);
322 size_t fname_len = 0;
323 char *fname_str = NULL;
338 fname_len = strlen(dir) + strlen("/d2utmpXXXXXX") + sizeof (char);
339 if (!(fname_str = malloc(fname_len)))
341 sprintf(fname_str, "%s%s", dir, "/d2utmpXXXXXX");
342 *fname_ret = fname_str;
347 name = mktemp(fname_str);
349 if ((fd = fopen(fname_str, W_CNTRL)) == NULL)
352 if ((fd = mkstemp(fname_str)) == -1)
368 /* Test if *lFN is the name of a symbolic link. If not, set *rFN equal
369 * to lFN, and return 0. If so, then use canonicalize_file_name or
370 * realpath to determine the pointed-to file; the resulting name is
371 * stored in newly allocated memory, *rFN is set to point to that value,
372 * and 1 is returned. On error, -1 is returned and errno is set as
375 * Note that if symbolic links are not supported, then 0 is always returned
378 * RetVal: 0 if success, and *lFN is not a symlink
379 * 1 if success, and *lFN is a symlink
382 int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, char *progname)
388 char *targetFN = NULL;
390 if (STAT(lFN, &StatBuf))
394 ipFlag->error = errno;
395 errstr = strerror(errno);
396 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
400 else if (S_ISLNK(StatBuf.st_mode))
402 #if USE_CANONICALIZE_FILE_NAME
403 targetFN = canonicalize_file_name(lFN);
408 errstr = strerror(errno);
409 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
420 /* Sigh. Use realpath, but realize that it has a fatal
421 * flaw: PATH_MAX isn't necessarily the maximum path
422 * length -- so realpath() might fail. */
423 targetFN = (char *) malloc(PATH_MAX * sizeof(char));
428 errstr = strerror(errno);
429 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
436 /* is there any platform with S_ISLNK that does not have realpath? */
437 char *rVal = realpath(lFN, targetFN);
442 errstr = strerror(errno);
443 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
455 #endif /* !USE_CANONICALIZE_FILE_NAME */
461 #endif /* !S_ISLNK */
465 FILE *read_bom (FILE *f, int *bomtype)
479 if ((bom[0] = fgetc(f)) == EOF)
485 if ((bom[0] != 0xff) && (bom[0] != 0xfe) && (bom[0] != 0xef))
491 if ((bom[1] = fgetc(f)) == EOF)
498 if ((bom[0] == 0xff) && (bom[1] == 0xfe)) /* UTF16-LE */
500 *bomtype = FILE_UTF16LE;
503 if ((bom[0] == 0xfe) && (bom[1] == 0xff)) /* UTF16-BE */
505 *bomtype = FILE_UTF16BE;
508 if ((bom[2] = fgetc(f)) == EOF)
516 if ((bom[0] == 0xef) && (bom[1] == 0xbb) && (bom[2]== 0xbf)) /* UTF-8 */
518 *bomtype = FILE_UTF8;
532 wint_t d2u_getwc(FILE *f, int bomtype)
537 if (((c_lead=fgetc(f)) == EOF) || ((c_trail=fgetc(f)) == EOF))
540 if (bomtype == FILE_UTF16LE) /* UTF16 little endian */
543 wc = (wint_t)(c_trail + c_lead) ;
544 } else { /* UTF16 big endian */
546 wc = (wint_t)(c_trail + c_lead) ;
551 wint_t d2u_ungetwc(wint_t wc, FILE *f, int bomtype)
555 if (bomtype == FILE_UTF16LE) /* UTF16 little endian */
557 c_trail = (int)(wc & 0xff00);
559 c_lead = (int)(wc & 0xff);
560 } else { /* UTF16 big endian */
561 c_lead = (int)(wc & 0xff00);
563 c_trail = (int)(wc & 0xff);
566 /* push back in reverse order */
567 if ((ungetc(c_trail,f) == EOF) || (ungetc(c_lead,f) == EOF))
572 /* Put wide character */
573 wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
576 static wchar_t lead, trail;
577 static wchar_t wstr[3];
580 if ((wc >= 0xd800) && (wc < 0xdc00))
582 /* fprintf(stderr, "UTF-16 lead %x\n",wc); */
583 lead = (wchar_t)wc; /* lead (high) surrogate */
586 if ((wc >= 0xdc00) && (wc < 0xe000))
588 /* fprintf(stderr, "UTF-16 trail %x\n",wc); */
589 trail = (wchar_t)wc; /* trail (low) surrogate */
590 #if defined(WIN32) || defined(__CYGWIN__)
591 /* On Windows (including Cygwin) wchar_t is 16 bit */
592 /* We cannot decode an UTF-16 surrogate pair, because it will
593 not fit in a 16 bit wchar_t. */
598 /* On Unix wchar_t is 32 bit */
599 /* When we don't decode the UTF-16 surrogate pair, wcstombs() does not
600 * produce the same UTF-8 as WideCharToMultiByte(). The UTF-8 output
601 * produced by wcstombs() is bigger, because it just translates the wide
602 * characters in the range 0xD800..0xDBFF individually to UTF-8 sequences
603 * (although these code points are reserved for use only as surrogate
604 * pairs in UTF-16). Probably because on Unix the size of wide char
605 * (wchar_t) is 32 bit, wcstombs assumes the encoding is UTF-32, and
606 * ignores UTF-16 surrogates all together. Some smart viewers can still
607 * display this UTF-8 correctly (like Total Commander lister), however
608 * the UTF-8 is not readable by Windows Notepad (on Windows 7). When we
609 * decode the UTF-16 surrogate pairs ourselves the wcstombs() UTF-8
610 * output is identical to what WideCharToMultiByte() produces, and is
611 * readable by Notepad.
613 /* Decode UTF-16 surrogate pair */
615 wstr[0] += (lead & 0x03FF) << 10;
616 wstr[0] += (trail & 0x03FF);
620 wstr[0] = (wchar_t)wc;
624 #if defined(WIN32) || defined(__CYGWIN__)
625 /* On Windows we convert UTF-16 always to UTF-8 */
626 len = (size_t)(WideCharToMultiByte(CP_UTF8, 0, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1);
628 /* On Unix we convert UTF-16 to the locale encoding */
629 len = wcstombs(mbs, wstr, sizeof(mbs));
632 if ( len == (size_t)(-1) )
633 { /* Stop when there is a conversion error */
634 ipFlag->status |= UNICODE_CONVERSION_ERROR ;
637 for (i=0; i<len; i++)
639 if (fputc(mbs[i], f) == EOF)