2 * Copyright (C) 2009-2014 Erwin Waterlander
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice in the documentation and/or other materials provided with
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
19 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
20 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
21 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
22 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
23 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
24 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 #if defined(D2U_UNICODE)
32 #if defined(_WIN32) || defined(__CYGWIN__)
35 #if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */
36 # include <langinfo.h>
40 #if defined(__GLIBC__)
41 /* on glibc, canonicalize_file_name() broken prior to 2.4 (06-Mar-2006) */
42 # if __GNUC_PREREQ (2,4)
43 # define USE_CANONICALIZE_FILE_NAME 1
45 #elif defined(__CYGWIN__)
46 /* on cygwin, canonicalize_file_name() available since api 0/213 */
47 /* (1.7.0beta61, 25-Sep-09) */
48 # include <cygwin/version.h>
49 # if (CYGWIN_VERSION_DLL_COMBINED >= 213) && (CYGWIN_VERSION_DLL_MAJOR >= 1007)
50 # define USE_CANONICALIZE_FILE_NAME 1
55 /******************************************************************
57 * int symbolic_link(char *path)
59 * test if *path points to a file that exists and is a symbolic link
61 * returns 1 on success, 0 when it fails.
63 ******************************************************************/
65 #if (defined(_WIN32) && !defined(__CYGWIN__))
67 int symbolic_link(const char *path)
71 attrs = GetFileAttributes(path);
73 if (attrs == INVALID_FILE_ATTRIBUTES)
76 return ((attrs & FILE_ATTRIBUTE_REPARSE_POINT) != 0);
80 int symbolic_link(const char *path)
85 if (STAT(path, &buf) == 0) {
86 if (S_ISLNK(buf.st_mode))
94 /******************************************************************
96 * int regfile(char *path, int allowSymlinks)
98 * test if *path points to a regular file (or is a symbolic link,
99 * if allowSymlinks != 0).
101 * returns 0 on success, -1 when it fails.
103 ******************************************************************/
104 int regfile(char *path, int allowSymlinks, CFlag *ipFlag, const char *progname)
109 if (STAT(path, &buf) == 0) {
111 fprintf(stderr, "%s: %s MODE 0%o ", progname, path, buf.st_mode);
113 if (S_ISSOCK(buf.st_mode))
114 fprintf(stderr, " (socket)");
117 if (S_ISLNK(buf.st_mode))
118 fprintf(stderr, " (symbolic link)");
120 if (S_ISREG(buf.st_mode))
121 fprintf(stderr, " (regular file)");
123 if (S_ISBLK(buf.st_mode))
124 fprintf(stderr, " (block device)");
126 if (S_ISDIR(buf.st_mode))
127 fprintf(stderr, " (directory)");
128 if (S_ISCHR(buf.st_mode))
129 fprintf(stderr, " (character device)");
130 if (S_ISFIFO(buf.st_mode))
131 fprintf(stderr, " (FIFO)");
132 fprintf(stderr, "\n");
134 if ((S_ISREG(buf.st_mode))
136 || (S_ISLNK(buf.st_mode) && allowSymlinks)
144 if (ipFlag->verbose) {
145 ipFlag->error = errno;
146 errstr = strerror(errno);
147 fprintf(stderr, "%s: %s: %s\n", progname, path, errstr);
153 /******************************************************************
155 * int regfile_target(char *path)
157 * test if *path points to a regular file (follow symbolic link)
159 * returns 0 on success, -1 when it fails.
161 ******************************************************************/
162 int regfile_target(char *path, CFlag *ipFlag, const char *progname)
167 if (stat(path, &buf) == 0) {
168 if (S_ISREG(buf.st_mode))
174 if (ipFlag->verbose) {
175 ipFlag->error = errno;
176 errstr = strerror(errno);
177 fprintf(stderr, "%s: %s: %s\n", progname, path, errstr);
183 void PrintBSDLicense(void)
186 Redistribution and use in source and binary forms, with or without\n\
187 modification, are permitted provided that the following conditions\n\
189 1. Redistributions of source code must retain the above copyright\n\
190 notice, this list of conditions and the following disclaimer.\n\
191 2. Redistributions in binary form must reproduce the above copyright\n\
192 notice in the documentation and/or other materials provided with\n\
193 the distribution.\n\n\
196 THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY\n\
197 EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\n\
198 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR\n\
199 PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE\n\
200 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR\n\
201 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT\n\
202 OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR\n\
203 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,\n\
204 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE\n\
205 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN\n\
206 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.\n\
210 int is_dos2unix(const char *progname)
212 if ((strncmp(progname, "dos2unix", sizeof("dos2unix")) == 0) || (strncmp(progname, "mac2unix", sizeof("mac2unix")) == 0))
218 void PrintUsage(const char *progname)
220 printf(_("Usage: %s [options] [file ...] [-n infile outfile ...]\n"), progname);
221 printf(_(" -ascii convert only line breaks (default)\n"));
222 printf(_(" -iso conversion between DOS and ISO-8859-1 character set\n"));
223 printf(_(" -1252 use Windows code page 1252 (Western European)\n"));
224 printf(_(" -437 use DOS code page 437 (US) (default)\n"));
225 printf(_(" -850 use DOS code page 850 (Western European)\n"));
226 printf(_(" -860 use DOS code page 860 (Portuguese)\n"));
227 printf(_(" -863 use DOS code page 863 (French Canadian)\n"));
228 printf(_(" -865 use DOS code page 865 (Nordic)\n"));
229 printf(_(" -7 convert 8 bit characters to 7 bit space\n"));
230 if (is_dos2unix(progname))
231 printf(_(" -b, --keep-bom keep Byte Order Mark\n"));
233 printf(_(" -b, --keep-bom keep Byte Order Mark (default)\n"));
234 printf(_(" -c, --convmode conversion mode\n\
235 convmode ascii, 7bit, iso, mac, default to ascii\n"));
236 printf(_(" -f, --force force conversion of binary files\n"));
237 printf(_(" -h, --help display this help text\n"));
238 printf(_(" -k, --keepdate keep output file date\n"));
239 printf(_(" -L, --license display software license\n"));
240 printf(_(" -l, --newline add additional newline\n"));
241 printf(_(" -m, --add-bom add Byte Order Mark (default UTF-8)\n"));
242 printf(_(" -n, --newfile write to new file\n\
243 infile original file in new-file mode\n\
244 outfile output file in new-file mode\n"));
245 printf(_(" -o, --oldfile write to old file (default)\n\
246 file ... files to convert in old-file mode\n"));
247 printf(_(" -q, --quiet quiet mode, suppress all warnings\n"));
248 if (is_dos2unix(progname))
249 printf(_(" -r, --remove-bom remove Byte Order Mark (default)\n"));
251 printf(_(" -r, --remove-bom remove Byte Order Mark\n"));
252 printf(_(" -s, --safe skip binary files (default)\n"));
254 printf(_(" -u, --keep-utf16 keep UTF-16 encoding\n"));
255 printf(_(" -ul, --assume-utf16le assume that the input format is UTF-16LE\n"));
256 printf(_(" -ub, --assume-utf16be assume that the input format is UTF-16BE\n"));
258 printf(_(" -v, --verbose verbose operation\n"));
260 printf(_(" -F, --follow-symlink follow symbolic links and convert the targets\n"));
262 #if defined(S_ISLNK) || (defined(_WIN32) && !defined(__CYGWIN__))
263 printf(_(" -R, --replace-symlink replace symbolic links with converted files\n\
264 (original target files remain unchanged)\n"));
265 printf(_(" -S, --skip-symlink keep symbolic links and targets unchanged (default)\n"));
267 printf(_(" -V, --version display version number\n"));
270 #define MINGW32_W64 1
272 void PrintVersion(const char *progname, const char *localedir)
274 printf("%s %s (%s)\n", progname, VER_REVISION, VER_DATE);
276 printf("VER_AUTHOR: %s\n", VER_AUTHOR);
278 #if defined(__WATCOMC__) && defined(__I86__)
279 printf("%s", _("DOS 16 bit version (WATCOMC).\n"));
280 #elif defined(__TURBOC__) && defined(__MSDOS__)
281 printf("%s", _("DOS 16 bit version (TURBOC).\n"));
282 #elif defined(__WATCOMC__) && defined(__DOS__)
283 printf("%s", _("DOS 32 bit version (WATCOMC).\n"));
284 #elif defined(__DJGPP__)
285 printf("%s", _("DOS 32 bit version (DJGPP).\n"));
286 #elif defined(__MSYS__)
287 printf("%s", _("MSYS version.\n"));
288 #elif defined(__CYGWIN__)
289 printf("%s", _("Cygwin version.\n"));
290 #elif defined(__WIN64__) && defined(__MINGW64__)
291 printf("%s", _("Windows 64 bit version (MinGW-w64).\n"));
292 #elif defined(__WATCOMC__) && defined(__NT__)
293 printf("%s", _("Windows 32 bit version (WATCOMC).\n"));
294 #elif defined(_WIN32) && defined(__MINGW32__) && (D2U_COMPILER == MINGW32_W64)
295 printf("%s", _("Windows 32 bit version (MinGW-w64).\n"));
296 #elif defined(_WIN32) && defined(__MINGW32__)
297 printf("%s", _("Windows 32 bit version (MinGW).\n"));
298 #elif defined(_WIN64) && defined(_MSC_VER)
299 printf(_("Windows 64 bit version (MSVC %d).\n"),_MSC_VER);
300 #elif defined(_WIN32) && defined(_MSC_VER)
301 printf(_("Windows 32 bit version (MSVC %d).\n"),_MSC_VER);
302 #elif defined (__OS2__) && defined(__WATCOMC__) /* OS/2 Warp */
303 printf("%s", _("OS/2 version (WATCOMC).\n"));
304 #elif defined (__OS2__) && defined(__EMX__) /* OS/2 Warp */
305 printf("%s", _("OS/2 version (EMX).\n"));
308 printf("%s", _("With Unicode UTF-16 support.\n"));
310 printf("%s", _("Without Unicode UTF-16 support.\n"));
313 printf("%s", _("With native language support.\n"));
315 printf("%s", "Without native language support.\n");
318 printf("LOCALEDIR: %s\n", localedir);
320 printf("http://waterlan.home.xs4all.nl/dos2unix.html\n");
324 /* opens file of name ipFN in read only mode
325 * RetVal: NULL if failure
326 * file stream otherwise
328 FILE* OpenInFile(char *ipFN)
330 return (fopen(ipFN, R_CNTRL));
334 /* opens file of name ipFN in write only mode
335 * RetVal: NULL if failure
336 * file stream otherwise
338 FILE* OpenOutFile(int fd)
340 return (fdopen(fd, W_CNTRL));
343 #if defined(__TURBOC__) || defined(__MSYS__) || defined(_MSC_VER)
344 char *dirname(char *path)
348 if (( path == NULL) || (((ptr=strrchr(path,'/')) == NULL) && ((ptr=strrchr(path,'\\')) == NULL)) )
358 FILE* MakeTempFileFrom(const char *OutFN, char **fname_ret)
360 int MakeTempFileFrom(const char *OutFN, char **fname_ret)
363 char *cpy = strdup(OutFN);
365 size_t fname_len = 0;
366 char *fname_str = NULL;
381 fname_len = strlen(dir) + strlen("/d2utmpXXXXXX") + sizeof (char);
382 if (!(fname_str = malloc(fname_len)))
384 sprintf(fname_str, "%s%s", dir, "/d2utmpXXXXXX");
385 *fname_ret = fname_str;
390 name = mktemp(fname_str);
392 if ((fd = fopen(fname_str, W_CNTRL)) == NULL)
395 if ((fd = mkstemp(fname_str)) == -1)
411 /* Test if *lFN is the name of a symbolic link. If not, set *rFN equal
412 * to lFN, and return 0. If so, then use canonicalize_file_name or
413 * realpath to determine the pointed-to file; the resulting name is
414 * stored in newly allocated memory, *rFN is set to point to that value,
415 * and 1 is returned. On error, -1 is returned and errno is set as
418 * Note that if symbolic links are not supported, then 0 is always returned
421 * RetVal: 0 if success, and *lFN is not a symlink
422 * 1 if success, and *lFN is a symlink
425 int ResolveSymbolicLink(char *lFN, char **rFN, CFlag *ipFlag, const char *progname)
431 char *targetFN = NULL;
433 if (STAT(lFN, &StatBuf)) {
434 if (ipFlag->verbose) {
435 ipFlag->error = errno;
436 errstr = strerror(errno);
437 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
441 else if (S_ISLNK(StatBuf.st_mode)) {
442 #if USE_CANONICALIZE_FILE_NAME
443 targetFN = canonicalize_file_name(lFN);
445 if (ipFlag->verbose) {
446 ipFlag->error = errno;
447 errstr = strerror(errno);
448 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
457 /* Sigh. Use realpath, but realize that it has a fatal
458 * flaw: PATH_MAX isn't necessarily the maximum path
459 * length -- so realpath() might fail. */
460 targetFN = (char *) malloc(PATH_MAX * sizeof(char));
462 if (ipFlag->verbose) {
463 ipFlag->error = errno;
464 errstr = strerror(errno);
465 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
470 /* is there any platform with S_ISLNK that does not have realpath? */
471 char *rVal = realpath(lFN, targetFN);
473 if (ipFlag->verbose) {
474 ipFlag->error = errno;
475 errstr = strerror(errno);
476 fprintf(stderr, "%s: %s: %s\n", progname, lFN, errstr);
486 #endif /* !USE_CANONICALIZE_FILE_NAME */
492 #endif /* !S_ISLNK */
496 FILE *read_bom (FILE *f, int *bomtype)
509 if ((bom[0] = fgetc(f)) == EOF) {
514 if ((bom[0] != 0xff) && (bom[0] != 0xfe) && (bom[0] != 0xef)) {
519 if ((bom[1] = fgetc(f)) == EOF) {
525 if ((bom[0] == 0xff) && (bom[1] == 0xfe)) { /* UTF16-LE */
526 *bomtype = FILE_UTF16LE;
529 if ((bom[0] == 0xfe) && (bom[1] == 0xff)) { /* UTF16-BE */
530 *bomtype = FILE_UTF16BE;
533 if ((bom[2] = fgetc(f)) == EOF) {
540 if ((bom[0] == 0xef) && (bom[1] == 0xbb) && (bom[2]== 0xbf)) { /* UTF-8 */
541 *bomtype = FILE_UTF8;
553 FILE *write_bom (FILE *f, CFlag *ipFlag, const char *progname)
555 if (ipFlag->keep_utf16)
557 switch (ipFlag->bomtype) {
558 case FILE_UTF16LE: /* UTF-16 Little Endian */
559 fprintf(f, "%s", "\xFF\xFE");
560 if (ipFlag->verbose > 1) {
561 fprintf(stderr, "%s: ", progname);
562 fprintf(stderr, _("Writing %s BOM.\n"), "UTF-16LE");
565 case FILE_UTF16BE: /* UTF-16 Big Endian */
566 fprintf(f, "%s", "\xFE\xFF");
567 if (ipFlag->verbose > 1) {
568 fprintf(stderr, "%s: ", progname);
569 fprintf(stderr, _("Writing %s BOM.\n"), "UTF-16BE");
573 fprintf(f, "%s", "\xEF\xBB\xBF");
574 if (ipFlag->verbose > 1) {
575 fprintf(stderr, "%s: ", progname);
576 fprintf(stderr, _("Writing %s BOM.\n"), "UTF-8");
581 fprintf(f, "%s", "\xEF\xBB\xBF");
582 if (ipFlag->verbose > 1)
584 fprintf(stderr, "%s: ", progname);
585 fprintf(stderr, _("Writing %s BOM.\n"), "UTF-8");
591 void print_bom (const int bomtype, const char *filename, const char *progname)
594 case FILE_UTF16LE: /* UTF-16 Little Endian */
595 fprintf(stderr, "%s: ", progname);
596 fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, "UTF-16LE");
598 case FILE_UTF16BE: /* UTF-16 Big Endian */
599 fprintf(stderr, "%s: ", progname);
600 fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, "UTF-16BE");
602 case FILE_UTF8: /* UTF-8 */
603 fprintf(stderr, "%s: ", progname);
604 fprintf(stderr, _("Input file %s has %s BOM.\n"), filename, "UTF-8");
611 int check_unicode(FILE *InF, FILE *TempF, CFlag *ipFlag, const char *ipInFN, const char *progname)
616 if (ipFlag->verbose > 1) {
617 if (ipFlag->ConvMode == CONVMODE_UTF16LE) {
618 fprintf(stderr, "%s: ", progname);
619 fprintf(stderr, _("Assuming UTF-16LE encoding.\n") );
621 if (ipFlag->ConvMode == CONVMODE_UTF16BE) {
622 fprintf(stderr, "%s: ", progname);
623 fprintf(stderr, _("Assuming UTF-16BE encoding.\n") );
627 InF = read_bom(InF, &ipFlag->bomtype);
628 if (ipFlag->verbose > 1)
629 print_bom(ipFlag->bomtype, ipInFN, progname);
631 if ((ipFlag->bomtype == FILE_MBS) && (ipFlag->ConvMode == CONVMODE_UTF16LE))
632 ipFlag->bomtype = FILE_UTF16LE;
633 if ((ipFlag->bomtype == FILE_MBS) && (ipFlag->ConvMode == CONVMODE_UTF16BE))
634 ipFlag->bomtype = FILE_UTF16BE;
638 #if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */
639 if (!ipFlag->keep_utf16 && ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE))) {
640 if (strcmp(nl_langinfo(CODESET), "UTF-8") != 0) {
641 /* Don't convert UTF-16 files when the locale encoding is not UTF-8
642 * to prevent loss of characters. */
643 ipFlag->status |= LOCALE_NOT_UTF8 ;
644 if (!ipFlag->error) ipFlag->error = 1;
649 #if !defined(_WIN32) && !defined(__CYGWIN__) /* Not Windows or Cygwin */
650 if (!ipFlag->keep_utf16 && ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE))) {
651 if (sizeof(wchar_t) < 4) {
652 /* A decoded UTF-16 surrogate pair must fit in a wchar_t */
653 ipFlag->status |= WCHAR_T_TOO_SMALL ;
654 if (!ipFlag->error) ipFlag->error = 1;
661 if ((!RetVal) && ((ipFlag->add_bom) || ((ipFlag->keep_bom) && (ipFlag->bomtype > 0))))
662 write_bom(TempF, ipFlag, progname);
667 /* convert file ipInFN and write to file ipOutFN
668 * RetVal: 0 if success
671 int ConvertNewFile(char *ipInFN, char *ipOutFN, CFlag *ipFlag, const char *progname,
672 int (*Convert)(FILE*, FILE*, CFlag *, const char *)
674 , int (*ConvertW)(FILE*, FILE*, CFlag *, const char *)
684 struct utimbuf UTimeBuf;
693 char *TargetFN = NULL;
694 int ResolveSymlinkResult = 0;
698 /* Test if output file is a symbolic link */
699 if (symbolic_link(ipOutFN) && !ipFlag->Follow) {
700 ipFlag->status |= OUTPUTFILE_SYMLINK ;
701 /* Not a failure, skipping input file according spec. (keep symbolic link unchanged) */
705 /* Test if input file is a regular file or symbolic link */
706 if (regfile(ipInFN, 1, ipFlag, progname)) {
707 ipFlag->status |= NO_REGFILE ;
708 /* Not a failure, skipping non-regular input file according spec. */
712 /* Test if input file target is a regular file */
713 if (symbolic_link(ipInFN) && regfile_target(ipInFN, ipFlag,progname)) {
714 ipFlag->status |= INPUT_TARGET_NO_REGFILE ;
715 /* Not a failure, skipping non-regular input file according spec. */
719 /* Test if output file target is a regular file */
720 if (symbolic_link(ipOutFN) && (ipFlag->Follow == SYMLINK_FOLLOW) && regfile_target(ipOutFN, ipFlag,progname)) {
721 ipFlag->status |= OUTPUT_TARGET_NO_REGFILE ;
722 /* Failure, input is regular, cannot produce output. */
723 if (!ipFlag->error) ipFlag->error = 1;
727 /* retrieve ipInFN file date stamp */
728 if (stat(ipInFN, &StatBuf)) {
729 if (ipFlag->verbose) {
730 ipFlag->error = errno;
731 errstr = strerror(errno);
732 fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr);
738 if((fd = MakeTempFileFrom(ipOutFN, &TempPath))==NULL) {
740 if((fd = MakeTempFileFrom (ipOutFN, &TempPath)) < 0) {
742 if (ipFlag->verbose) {
743 ipFlag->error = errno;
744 errstr = strerror(errno);
745 fprintf(stderr, "%s: ", progname);
746 fprintf(stderr, _("Failed to open temporary output file: %s\n"), errstr);
752 fprintf(stderr, "%s: ", progname);
753 fprintf(stderr, _("using %s as temporary file\n"), TempPath);
756 /* can open in file? */
758 InF=OpenInFile(ipInFN);
760 ipFlag->error = errno;
761 errstr = strerror(errno);
762 fprintf(stderr, "%s: %s: %s\n", progname, ipInFN, errstr);
767 /* can open output file? */
768 if ((!RetVal) && (InF)) {
770 if ((TempF=fd) == NULL) {
772 if ((TempF=OpenOutFile(fd)) == NULL) {
773 ipFlag->error = errno;
774 errstr = strerror(errno);
775 fprintf(stderr, "%s: %s\n", progname, errstr);
784 if (check_unicode(InF, TempF, ipFlag, ipInFN, progname))
787 /* conversion sucessful? */
789 if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) {
790 if ((!RetVal) && (ConvertW(InF, TempF, ipFlag, progname)))
792 if (ipFlag->status & UNICODE_CONVERSION_ERROR) {
793 if (!ipFlag->error) ipFlag->error = 1;
797 if ((!RetVal) && (Convert(InF, TempF, ipFlag, progname)))
801 if ((!RetVal) && (Convert(InF, TempF, ipFlag, progname)))
805 /* can close in file? */
806 if ((InF) && (fclose(InF) == EOF))
809 /* can close output file? */
811 if (fclose(TempF) == EOF) {
812 if (ipFlag->verbose) {
813 ipFlag->error = errno;
814 errstr = strerror(errno);
815 fprintf(stderr, "%s: ", progname);
816 fprintf(stderr, _("Failed to write to temporary output file %s: %s\n"), TempPath, errstr);
833 if (ipFlag->NewFile == 0) { /* old-file mode */
834 RetVal = chmod (TempPath, StatBuf.st_mode); /* set original permissions */
836 mask = umask(0); /* get process's umask */
837 umask(mask); /* set umask back to original */
838 RetVal = chmod(TempPath, StatBuf.st_mode & ~mask); /* set original permissions, minus umask */
842 if (ipFlag->verbose) {
843 ipFlag->error = errno;
844 errstr = strerror(errno);
845 fprintf(stderr, "%s: ", progname);
846 fprintf(stderr, _("Failed to change the permissions of temporary output file %s: %s\n"), TempPath, errstr);
853 if (!RetVal && (ipFlag->NewFile == 0)) { /* old-file mode */
854 /* Change owner and group of the temporary output file to the original file's uid and gid. */
855 /* Required when a different user (e.g. root) has write permission on the original file. */
856 /* Make sure that the original owner can still access the file. */
857 if (chown(TempPath, StatBuf.st_uid, StatBuf.st_gid)) {
858 if (ipFlag->verbose) {
859 ipFlag->error = errno;
860 errstr = strerror(errno);
861 fprintf(stderr, "%s: ", progname);
862 fprintf(stderr, _("Failed to change the owner and group of temporary output file %s: %s\n"), TempPath, errstr);
869 if ((!RetVal) && (ipFlag->KeepDate))
871 UTimeBuf.actime = StatBuf.st_atime;
872 UTimeBuf.modtime = StatBuf.st_mtime;
873 /* can change output file time to in file time? */
874 if (utime(TempPath, &UTimeBuf) == -1) {
875 if (ipFlag->verbose) {
876 ipFlag->error = errno;
877 errstr = strerror(errno);
878 fprintf(stderr, "%s: %s: %s\n", progname, TempPath, errstr);
884 /* any error? cleanup the temp file */
885 if (RetVal && (TempPath != NULL)) {
886 if (unlink(TempPath) && (errno != ENOENT)) {
887 if (ipFlag->verbose) {
888 ipFlag->error = errno;
889 errstr = strerror(errno);
890 fprintf(stderr, "%s: %s: %s\n", progname, TempPath, errstr);
896 /* If output file is a symbolic link, optional resolve the link and modify */
897 /* the target, instead of removing the link and creating a new regular file */
899 if (symbolic_link(ipOutFN) && !RetVal) {
900 ResolveSymlinkResult = 0; /* indicates that TargetFN need not be freed */
901 if (ipFlag->Follow == SYMLINK_FOLLOW) {
902 ResolveSymlinkResult = ResolveSymbolicLink(ipOutFN, &TargetFN, ipFlag, progname);
903 if (ResolveSymlinkResult < 0) {
904 if (ipFlag->verbose) {
905 fprintf(stderr, "%s: ", progname);
906 fprintf(stderr, _("problems resolving symbolic link '%s'\n"), ipOutFN);
907 fprintf(stderr, _(" output file remains in '%s'\n"), TempPath);
914 /* can rename temporary file to output file? */
917 if (unlink(TargetFN) && (errno != ENOENT)) {
918 if (ipFlag->verbose) {
919 ipFlag->error = errno;
920 errstr = strerror(errno);
921 fprintf(stderr, "%s: %s: %s\n", progname, TargetFN, errstr);
926 if (rename(TempPath, TargetFN) == -1) {
927 if (ipFlag->verbose) {
928 ipFlag->error = errno;
929 errstr = strerror(errno);
930 fprintf(stderr, "%s: ", progname);
931 fprintf(stderr, _("problems renaming '%s' to '%s': %s\n"), TempPath, TargetFN, errstr);
933 if (ResolveSymlinkResult > 0)
934 fprintf(stderr, _(" which is the target of symbolic link '%s'\n"), ipOutFN);
936 fprintf(stderr, _(" output file remains in '%s'\n"), TempPath);
941 if (ResolveSymlinkResult > 0)
948 /* convert stdin and write to stdout
949 * RetVal: 0 if success
952 int ConvertStdio(CFlag *ipFlag, const char *progname,
953 int (*Convert)(FILE*, FILE*, CFlag *, const char *)
955 , int (*ConvertW)(FILE*, FILE*, CFlag *, const char *)
960 ipFlag->KeepDate = 0;
962 #if defined(_WIN32) && !defined(__CYGWIN__)
964 /* stdin and stdout are by default text streams. We need
965 * to set them to binary mode. Otherwise an LF will
966 * automatically be converted to CR-LF on DOS/Windows.
969 /* POSIX 'setmode' was deprecated by MicroSoft since
970 * Visual C++ 2005. Use ISO C++ conformant '_setmode' instead. */
972 _setmode(_fileno(stdout), _O_BINARY);
973 _setmode(_fileno(stdin), _O_BINARY);
974 #elif defined(__MSDOS__) || defined(__CYGWIN__) || defined(__OS2__)
975 setmode(fileno(stdout), O_BINARY);
976 setmode(fileno(stdin), O_BINARY);
979 if (check_unicode(stdin, stdout, ipFlag, "stdin", progname))
983 if ((ipFlag->bomtype == FILE_UTF16LE) || (ipFlag->bomtype == FILE_UTF16BE)) {
984 return ConvertW(stdin, stdout, ipFlag, progname);
986 return Convert(stdin, stdout, ipFlag, progname);
989 return Convert(stdin, stdout, ipFlag, progname);
993 void print_messages_stdio(const CFlag *pFlag, const char *progname)
995 if (pFlag->status & BINARY_FILE) {
996 fprintf(stderr,"%s: ",progname);
997 fprintf(stderr, _("Skipping binary file %s\n"), "stdin");
998 } else if (pFlag->status & WRONG_CODEPAGE) {
999 fprintf(stderr,"%s: ",progname);
1000 fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
1001 } else if (pFlag->status & LOCALE_NOT_UTF8) {
1002 fprintf(stderr,"%s: ",progname);
1003 fprintf(stderr, _("Skipping UTF-16 file %s, the current locale character encoding is not UTF-8.\n"), "stdin");
1004 } else if (pFlag->status & WCHAR_T_TOO_SMALL) {
1005 fprintf(stderr,"%s: ",progname);
1006 fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), "stdin", (int)sizeof(wchar_t));
1007 } else if (pFlag->status & UNICODE_CONVERSION_ERROR) {
1008 fprintf(stderr,"%s: ",progname);
1009 fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), "stdin");
1013 void print_messages_newfile(const CFlag *pFlag, const char *infile, const char *outfile, const char *progname, const int RetVal)
1015 if (pFlag->status & NO_REGFILE) {
1016 fprintf(stderr,"%s: ",progname);
1017 fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
1018 } else if (pFlag->status & OUTPUTFILE_SYMLINK) {
1019 fprintf(stderr,"%s: ",progname);
1020 fprintf(stderr, _("Skipping %s, output file %s is a symbolic link.\n"), infile, outfile);
1021 } else if (pFlag->status & INPUT_TARGET_NO_REGFILE) {
1022 fprintf(stderr,"%s: ",progname);
1023 fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
1024 } else if (pFlag->status & OUTPUT_TARGET_NO_REGFILE) {
1025 fprintf(stderr,"%s: ",progname);
1026 fprintf(stderr, _("Skipping %s, target of symbolic link %s is not a regular file.\n"), infile, outfile);
1027 } else if (pFlag->status & BINARY_FILE) {
1028 fprintf(stderr,"%s: ",progname);
1029 fprintf(stderr, _("Skipping binary file %s\n"), infile);
1030 } else if (pFlag->status & WRONG_CODEPAGE) {
1031 fprintf(stderr,"%s: ",progname);
1032 fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
1033 } else if (pFlag->status & LOCALE_NOT_UTF8) {
1034 fprintf(stderr,"%s: ",progname);
1035 fprintf(stderr, _("Skipping UTF-16 file %s, the current locale character encoding is not UTF-8.\n"), infile);
1036 } else if (pFlag->status & WCHAR_T_TOO_SMALL) {
1037 fprintf(stderr,"%s: ",progname);
1038 fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
1039 } else if (pFlag->status & UNICODE_CONVERSION_ERROR) {
1040 fprintf(stderr,"%s: ",progname);
1041 fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), infile);
1043 fprintf(stderr,"%s: ",progname);
1044 if (is_dos2unix(progname))
1045 fprintf(stderr, _("converting file %s to file %s in Unix format...\n"), infile, outfile);
1047 if (pFlag->FromToMode == FROMTO_UNIX2MAC)
1048 fprintf(stderr, _("converting file %s to file %s in Mac format...\n"), infile, outfile);
1050 fprintf(stderr, _("converting file %s to file %s in DOS format...\n"), infile, outfile);
1053 fprintf(stderr,"%s: ",progname);
1054 fprintf(stderr, _("problems converting file %s to file %s\n"), infile, outfile);
1059 void print_messages_oldfile(const CFlag *pFlag, const char *infile, const char *progname, const int RetVal)
1061 if (pFlag->status & NO_REGFILE) {
1062 fprintf(stderr,"%s: ",progname);
1063 fprintf(stderr, _("Skipping %s, not a regular file.\n"), infile);
1064 } else if (pFlag->status & OUTPUTFILE_SYMLINK) {
1065 fprintf(stderr,"%s: ",progname);
1066 fprintf(stderr, _("Skipping symbolic link %s.\n"), infile);
1067 } else if (pFlag->status & INPUT_TARGET_NO_REGFILE) {
1068 fprintf(stderr,"%s: ",progname);
1069 fprintf(stderr, _("Skipping symbolic link %s, target is not a regular file.\n"), infile);
1070 } else if (pFlag->status & BINARY_FILE) {
1071 fprintf(stderr,"%s: ",progname);
1072 fprintf(stderr, _("Skipping binary file %s\n"), infile);
1073 } else if (pFlag->status & WRONG_CODEPAGE) {
1074 fprintf(stderr,"%s: ",progname);
1075 fprintf(stderr, _("code page %d is not supported.\n"), pFlag->ConvMode);
1076 } else if (pFlag->status & LOCALE_NOT_UTF8) {
1077 fprintf(stderr,"%s: ",progname);
1078 fprintf(stderr, _("Skipping UTF-16 file %s, the current locale character encoding is not UTF-8.\n"), infile);
1079 } else if (pFlag->status & WCHAR_T_TOO_SMALL) {
1080 fprintf(stderr,"%s: ",progname);
1081 fprintf(stderr, _("Skipping UTF-16 file %s, the size of wchar_t is %d bytes.\n"), infile, (int)sizeof(wchar_t));
1082 } else if (pFlag->status & UNICODE_CONVERSION_ERROR) {
1083 fprintf(stderr,"%s: ",progname);
1084 fprintf(stderr, _("Skipping UTF-16 file %s, an UTF-16 conversion error occurred.\n"), infile);
1086 fprintf(stderr,"%s: ",progname);
1087 if (is_dos2unix(progname))
1088 fprintf(stderr, _("converting file %s to Unix format...\n"), infile);
1090 if (pFlag->FromToMode == FROMTO_UNIX2MAC)
1091 fprintf(stderr, _("converting file %s to Mac format...\n"), infile);
1093 fprintf(stderr, _("converting file %s to DOS format...\n"), infile);
1096 fprintf(stderr,"%s: ",progname);
1097 fprintf(stderr, _("problems converting file %s\n"), infile);
1102 int parse_options(int argc, char *argv[], CFlag *pFlag, const char *localedir, const char *progname,
1103 void (*PrintLicense)(void),
1104 int (*Convert)(FILE*, FILE*, CFlag *, const char *)
1106 , int (*ConvertW)(FILE*, FILE*, CFlag *, const char *)
1112 int CanSwitchFileMode = 1;
1113 int process_options = 1;
1116 /* variable initialisations */
1119 pFlag->KeepDate = 0;
1120 pFlag->ConvMode = CONVMODE_ASCII; /* default ascii */
1123 pFlag->Follow = SYMLINK_SKIP;
1125 pFlag->stdio_mode = 1;
1127 pFlag->bomtype = FILE_MBS;
1129 pFlag->keep_utf16 = 0;
1131 while ((++ArgIdx < argc) && (!ShouldExit))
1133 /* is it an option? */
1134 if ((argv[ArgIdx][0] == '-') && process_options)
1137 if (strcmp(argv[ArgIdx],"--") == 0)
1138 process_options = 0;
1139 else if ((strcmp(argv[ArgIdx],"-h") == 0) || (strcmp(argv[ArgIdx],"--help") == 0))
1141 PrintUsage(progname);
1142 return(pFlag->error);
1144 else if ((strcmp(argv[ArgIdx],"-b") == 0) || (strcmp(argv[ArgIdx],"--keep-bom") == 0))
1145 pFlag->keep_bom = 1;
1146 else if ((strcmp(argv[ArgIdx],"-k") == 0) || (strcmp(argv[ArgIdx],"--keepdate") == 0))
1147 pFlag->KeepDate = 1;
1148 else if ((strcmp(argv[ArgIdx],"-f") == 0) || (strcmp(argv[ArgIdx],"--force") == 0))
1150 else if ((strcmp(argv[ArgIdx],"-s") == 0) || (strcmp(argv[ArgIdx],"--safe") == 0))
1152 else if ((strcmp(argv[ArgIdx],"-q") == 0) || (strcmp(argv[ArgIdx],"--quiet") == 0))
1154 else if ((strcmp(argv[ArgIdx],"-v") == 0) || (strcmp(argv[ArgIdx],"--verbose") == 0))
1156 else if ((strcmp(argv[ArgIdx],"-l") == 0) || (strcmp(argv[ArgIdx],"--newline") == 0))
1158 else if ((strcmp(argv[ArgIdx],"-m") == 0) || (strcmp(argv[ArgIdx],"--add-bom") == 0))
1160 else if ((strcmp(argv[ArgIdx],"-r") == 0) || (strcmp(argv[ArgIdx],"--remove-bom") == 0)) {
1161 pFlag->keep_bom = 0;
1164 else if ((strcmp(argv[ArgIdx],"-S") == 0) || (strcmp(argv[ArgIdx],"--skip-symlink") == 0))
1165 pFlag->Follow = SYMLINK_SKIP;
1166 else if ((strcmp(argv[ArgIdx],"-F") == 0) || (strcmp(argv[ArgIdx],"--follow-symlink") == 0))
1167 pFlag->Follow = SYMLINK_FOLLOW;
1168 else if ((strcmp(argv[ArgIdx],"-R") == 0) || (strcmp(argv[ArgIdx],"--replace-symlink") == 0))
1169 pFlag->Follow = SYMLINK_REPLACE;
1170 else if ((strcmp(argv[ArgIdx],"-V") == 0) || (strcmp(argv[ArgIdx],"--version") == 0)) {
1171 PrintVersion(progname, localedir);
1172 return(pFlag->error);
1174 else if ((strcmp(argv[ArgIdx],"-L") == 0) || (strcmp(argv[ArgIdx],"--license") == 0)) {
1176 return(pFlag->error);
1178 else if (strcmp(argv[ArgIdx],"-ascii") == 0) { /* SunOS compatible options */
1179 pFlag->ConvMode = CONVMODE_ASCII;
1180 pFlag->keep_utf16 = 0;
1182 else if (strcmp(argv[ArgIdx],"-7") == 0)
1183 pFlag->ConvMode = CONVMODE_7BIT;
1184 else if (strcmp(argv[ArgIdx],"-iso") == 0) {
1185 pFlag->ConvMode = (int)query_con_codepage();
1186 if (pFlag->verbose) {
1187 fprintf(stderr,"%s: ",progname);
1188 fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode);
1190 if (pFlag->ConvMode < 2)
1191 pFlag->ConvMode = CONVMODE_437;
1193 else if (strcmp(argv[ArgIdx],"-437") == 0)
1194 pFlag->ConvMode = CONVMODE_437;
1195 else if (strcmp(argv[ArgIdx],"-850") == 0)
1196 pFlag->ConvMode = CONVMODE_850;
1197 else if (strcmp(argv[ArgIdx],"-860") == 0)
1198 pFlag->ConvMode = CONVMODE_860;
1199 else if (strcmp(argv[ArgIdx],"-863") == 0)
1200 pFlag->ConvMode = CONVMODE_863;
1201 else if (strcmp(argv[ArgIdx],"-865") == 0)
1202 pFlag->ConvMode = CONVMODE_865;
1203 else if (strcmp(argv[ArgIdx],"-1252") == 0)
1204 pFlag->ConvMode = CONVMODE_1252;
1206 else if ((strcmp(argv[ArgIdx],"-u") == 0) || (strcmp(argv[ArgIdx],"--keep-utf16") == 0))
1207 pFlag->keep_utf16 = 1;
1208 else if ((strcmp(argv[ArgIdx],"-ul") == 0) || (strcmp(argv[ArgIdx],"--assume-utf16le") == 0))
1209 pFlag->ConvMode = CONVMODE_UTF16LE;
1210 else if ((strcmp(argv[ArgIdx],"-ub") == 0) || (strcmp(argv[ArgIdx],"--assume-utf16be") == 0))
1211 pFlag->ConvMode = CONVMODE_UTF16BE;
1213 else if ((strcmp(argv[ArgIdx],"-c") == 0) || (strcmp(argv[ArgIdx],"--convmode") == 0)) {
1214 if (++ArgIdx < argc) {
1215 if (strcmpi(argv[ArgIdx],"ascii") == 0) { /* Benjamin Lin's legacy options */
1216 pFlag->ConvMode = CONVMODE_ASCII;
1217 pFlag->keep_utf16 = 0;
1219 else if (strcmpi(argv[ArgIdx], "7bit") == 0)
1220 pFlag->ConvMode = CONVMODE_7BIT;
1221 else if (strcmpi(argv[ArgIdx], "iso") == 0) {
1222 pFlag->ConvMode = (int)query_con_codepage();
1223 if (pFlag->verbose) {
1224 fprintf(stderr,"%s: ",progname);
1225 fprintf(stderr,_("active code page: %d\n"), pFlag->ConvMode);
1227 if (pFlag->ConvMode < 2)
1228 pFlag->ConvMode = CONVMODE_437;
1230 else if (strcmpi(argv[ArgIdx], "mac") == 0) {
1231 if (is_dos2unix(progname))
1232 pFlag->FromToMode = FROMTO_MAC2UNIX;
1234 pFlag->FromToMode = FROMTO_UNIX2MAC;
1236 fprintf(stderr,"%s: ",progname);
1237 fprintf(stderr, _("invalid %s conversion mode specified\n"),argv[ArgIdx]);
1240 pFlag->stdio_mode = 0;
1244 fprintf(stderr,"%s: ",progname);
1245 fprintf(stderr,_("option '%s' requires an argument\n"),argv[ArgIdx]);
1248 pFlag->stdio_mode = 0;
1252 else if ((strcmp(argv[ArgIdx],"-o") == 0) || (strcmp(argv[ArgIdx],"--oldfile") == 0)) {
1253 /* last convert not paired */
1254 if (!CanSwitchFileMode) {
1255 fprintf(stderr,"%s: ",progname);
1256 fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
1259 pFlag->stdio_mode = 0;
1264 else if ((strcmp(argv[ArgIdx],"-n") == 0) || (strcmp(argv[ArgIdx],"--newfile") == 0)) {
1265 /* last convert not paired */
1266 if (!CanSwitchFileMode) {
1267 fprintf(stderr,"%s: ",progname);
1268 fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
1271 pFlag->stdio_mode = 0;
1275 else { /* wrong option */
1276 PrintUsage(progname);
1279 pFlag->stdio_mode = 0;
1283 pFlag->stdio_mode = 0;
1285 if (pFlag->NewFile) {
1286 if (CanSwitchFileMode)
1287 CanSwitchFileMode = 0;
1290 RetVal = ConvertNewFile(argv[ArgIdx-1], argv[ArgIdx], pFlag, progname, Convert, ConvertW);
1292 RetVal = ConvertNewFile(argv[ArgIdx-1], argv[ArgIdx], pFlag, progname, Convert);
1295 print_messages_newfile(pFlag, argv[ArgIdx-1], argv[ArgIdx], progname, RetVal);
1296 CanSwitchFileMode = 1;
1301 RetVal = ConvertNewFile(argv[ArgIdx], argv[ArgIdx], pFlag, progname, Convert, ConvertW);
1303 RetVal = ConvertNewFile(argv[ArgIdx], argv[ArgIdx], pFlag, progname, Convert);
1306 print_messages_oldfile(pFlag, argv[ArgIdx], progname, RetVal);
1311 /* no file argument, use stdin and stdout */
1312 if (pFlag->stdio_mode) {
1314 ConvertStdio(pFlag, progname, Convert, ConvertW);
1316 ConvertStdio(pFlag, progname, Convert);
1319 print_messages_stdio(pFlag, progname);
1320 return pFlag->error;
1323 if (!CanSwitchFileMode) {
1324 fprintf(stderr,"%s: ",progname);
1325 fprintf(stderr, _("target of file %s not specified in new-file mode\n"), argv[ArgIdx-1]);
1328 return pFlag->error;
1332 wint_t d2u_getwc(FILE *f, int bomtype)
1334 int c_trail, c_lead;
1337 if (((c_lead=fgetc(f)) == EOF) || ((c_trail=fgetc(f)) == EOF))
1340 if (bomtype == FILE_UTF16LE) { /* UTF16 little endian */
1342 wc = (wint_t)(c_trail + c_lead) ;
1343 } else { /* UTF16 big endian */
1345 wc = (wint_t)(c_trail + c_lead) ;
1350 wint_t d2u_ungetwc(wint_t wc, FILE *f, int bomtype)
1352 int c_trail, c_lead;
1354 if (bomtype == FILE_UTF16LE) { /* UTF16 little endian */
1355 c_trail = (int)(wc & 0xff00);
1357 c_lead = (int)(wc & 0xff);
1358 } else { /* UTF16 big endian */
1359 c_lead = (int)(wc & 0xff00);
1361 c_trail = (int)(wc & 0xff);
1364 /* push back in reverse order */
1365 if ((ungetc(c_trail,f) == EOF) || (ungetc(c_lead,f) == EOF))
1370 /* Put wide character */
1371 wint_t d2u_putwc(wint_t wc, FILE *f, CFlag *ipFlag)
1374 static wchar_t lead, trail;
1375 static wchar_t wstr[3];
1377 int c_trail, c_lead;
1379 if (ipFlag->keep_utf16) {
1380 if (ipFlag->bomtype == FILE_UTF16LE) { /* UTF16 little endian */
1381 c_trail = (int)(wc & 0xff00);
1383 c_lead = (int)(wc & 0xff);
1384 } else { /* UTF16 big endian */
1385 c_lead = (int)(wc & 0xff00);
1387 c_trail = (int)(wc & 0xff);
1389 if ((fputc(c_lead,f) == EOF) || (fputc(c_trail,f) == EOF))
1394 if ((wc >= 0xd800) && (wc < 0xdc00)) {
1395 /* fprintf(stderr, "UTF-16 lead %x\n",wc); */
1396 lead = (wchar_t)wc; /* lead (high) surrogate */
1399 if ((wc >= 0xdc00) && (wc < 0xe000)) {
1400 /* fprintf(stderr, "UTF-16 trail %x\n",wc); */
1401 trail = (wchar_t)wc; /* trail (low) surrogate */
1402 #if defined(_WIN32) || defined(__CYGWIN__)
1403 /* On Windows (including Cygwin) wchar_t is 16 bit */
1404 /* We cannot decode an UTF-16 surrogate pair, because it will
1405 not fit in a 16 bit wchar_t. */
1410 /* On Unix wchar_t is 32 bit */
1411 /* When we don't decode the UTF-16 surrogate pair, wcstombs() does not
1412 * produce the same UTF-8 as WideCharToMultiByte(). The UTF-8 output
1413 * produced by wcstombs() is bigger, because it just translates the wide
1414 * characters in the range 0xD800..0xDBFF individually to UTF-8 sequences
1415 * (although these code points are reserved for use only as surrogate
1418 * Some smart viewers can still display this UTF-8 correctly (like Total
1419 * Commander lister), however the UTF-8 is not readable by Windows
1420 * Notepad (on Windows 7). When we decode the UTF-16 surrogate pairs
1421 * ourselves the wcstombs() UTF-8 output is identical to what
1422 * WideCharToMultiByte() produces, and is readable by Notepad.
1424 * Surrogate halves in UTF-8 are invalid. See also
1425 * http://en.wikipedia.org/wiki/UTF-8#Invalid_code_points
1426 * http://tools.ietf.org/html/rfc3629#page-5
1427 * It is a bug in (some implemenatations of) wcstombs().
1428 * On Cygwin 1.7 wcstombs() produces correct UTF-8 from UTF-16 surrogate pairs.
1430 /* Decode UTF-16 surrogate pair */
1432 wstr[0] += (lead & 0x03FF) << 10;
1433 wstr[0] += (trail & 0x03FF);
1435 /* fprintf(stderr, "UTF-32 %x\n",wstr[0]); */
1438 wstr[0] = (wchar_t)wc;
1442 #if defined(_WIN32) || defined(__CYGWIN__)
1443 /* On Windows we convert UTF-16 always to UTF-8 */
1444 len = (size_t)(WideCharToMultiByte(CP_UTF8, 0, wstr, -1, mbs, sizeof(mbs), NULL, NULL) -1);
1446 /* On Unix we convert UTF-16 to the locale encoding */
1447 len = wcstombs(mbs, wstr, sizeof(mbs));
1448 /* fprintf(stderr, "len %d\n",len); */
1451 if ( len == (size_t)(-1) ) {
1452 /* Stop when there is a conversion error */
1453 ipFlag->status |= UNICODE_CONVERSION_ERROR ;
1456 for (i=0; i<len; i++) {
1457 if (fputc(mbs[i], f) == EOF)