4 * Convert lf ('\x0a') characters in a file to cr lf ('\x0d' '\x0a')
7 * The dos2unix package is distributed under FreeBSD style license.
8 * See also http://www.freebsd.org/copyright/freebsd-license.html
11 * Copyright (C) 2009-2016 Erwin Waterlander
12 * Copyright (C) 1994-1995 Benjamin Lin.
13 * All rights reserved.
15 * Redistribution and use in source and binary forms, with or without
16 * modification, are permitted provided that the following conditions
18 * 1. Redistributions of source code must retain the above copyright
19 * notice, this list of conditions and the following disclaimer.
20 * 2. Redistributions in binary form must reproduce the above copyright
21 * notice in the documentation and/or other materials provided with
24 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY
25 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
27 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
30 * OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
31 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
32 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
33 * OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
34 * IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 * == 1.0 == 1989.10.04 == John Birchfield (jb@koko.csustan.edu)
37 * == 1.1 == 1994.12.20 == Benjamin Lin (blin@socs.uts.edu.au)
38 * Cleaned up for Borland C/C++ 4.02
39 * == 1.2 == 1995.03.09 == Benjamin Lin (blin@socs.uts.edu.au)
40 * Fixed minor typo error
41 * == 1.3 == 1995.03.16 == Benjamin Lin (blin@socs.uts.edu.au)
42 * Modified to more conform to UNIX style.
43 * == 2.0 == 1995.03.19 == Benjamin Lin (blin@socs.uts.edu.au)
44 * Rewritten from scratch.
45 * == 2.2 == 1995.03.30 == Benjamin Lin (blin@socs.uts.edu.au)
46 * Conversion from SunOS charset implemented.
48 * See ChangeLog.txt for complete version history.
58 # if (defined(_WIN32) && !defined(__CYGWIN__))
62 #if !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__) /* Unix, Cygwin */
63 # include <langinfo.h>
67 void PrintLicense(void)
69 D2U_ANSI_FPRINTF(stdout,_("\
70 Copyright (C) 2009-%d Erwin Waterlander\n\
71 Copyright (C) 1994-1995 Benjamin Lin\n\
72 All rights reserved.\n\n"),2016);
77 wint_t AddDOSNewLineW(FILE* ipOutF, CFlag *ipFlag, wint_t CurChar, wint_t PrevChar, const char *progname)
79 if (ipFlag->NewLine) { /* add additional CR-LF? */
80 /* Don't add line ending if it is a DOS line ending. Only in case of Unix line ending. */
81 if ((CurChar == 0x0a) && (PrevChar != 0x0d)) {
82 if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) {
83 d2u_putwc_error(ipFlag,progname);
86 if (d2u_putwc(0x0a, ipOutF, ipFlag, progname) == WEOF) {
87 d2u_putwc_error(ipFlag,progname);
96 int AddDOSNewLine(FILE* ipOutF, CFlag *ipFlag, int CurChar, int PrevChar, const char *progname)
98 if (ipFlag->NewLine) { /* add additional CR-LF? */
99 /* Don't add line ending if it is a DOS line ending. Only in case of Unix line ending. */
100 if ((CurChar == '\x0a') && (PrevChar != '\x0d')) {
101 if (fputc('\x0d', ipOutF) == EOF) {
102 d2u_putc_error(ipFlag,progname);
105 if (fputc('\x0a', ipOutF) == EOF) {
106 d2u_putc_error(ipFlag,progname);
114 /* converts stream ipInF to DOS format text and write to stream ipOutF
115 * RetVal: 0 if success
119 int ConvertUnixToDosW(FILE* ipInF, FILE* ipOutF, CFlag *ipFlag, const char *progname)
123 wint_t PreviousChar = 0;
124 unsigned int line_nr = 1;
125 unsigned int converted = 0;
130 /* CR-LF -> CR-LF, in case the input file is a DOS text file */
131 /* \x0a = Newline/Line Feed (LF) */
132 /* \x0d = Carriage Return (CR) */
134 switch (ipFlag->FromToMode)
136 case FROMTO_UNIX2DOS: /* unix2dos */
137 while ((TempChar = d2u_getwc(ipInF, ipFlag->bomtype)) != WEOF) { /* get character */
138 if ((ipFlag->Force == 0) &&
140 (TempChar != 0x0a) && /* Not an LF */
141 (TempChar != 0x0d) && /* Not a CR */
142 (TempChar != 0x09) && /* Not a TAB */
143 (TempChar != 0x0c)) { /* Not a form feed */
145 ipFlag->status |= BINARY_FILE ;
146 if (ipFlag->verbose) {
147 if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
148 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
149 D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x00%02X found at line %u\n"), TempChar, line_nr);
153 if (TempChar == 0x0a) {
154 if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) { /* got LF, put extra CR */
156 d2u_putwc_error(ipFlag,progname);
161 if (TempChar == 0x0d) { /* got CR */
162 if ((TempChar = d2u_getwc(ipInF, ipFlag->bomtype)) == WEOF) { /* get next char (possibly LF) */
163 if (ferror(ipInF)) /* Read error */
165 TempChar = 0x0d; /* end of file. */
167 if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) { /* put CR */
169 d2u_putwc_error(ipFlag,progname);
176 if (TempChar == 0x0a) /* Count all DOS and Unix line breaks */
178 if (d2u_putwc(TempChar, ipOutF, ipFlag, progname) == WEOF)
181 d2u_putwc_error(ipFlag,progname);
184 if (AddDOSNewLineW( ipOutF, ipFlag, TempChar, PreviousChar, progname) == WEOF) {
189 PreviousChar = TempChar;
191 if ((TempChar == WEOF) && ferror(ipInF)) {
193 d2u_getc_error(ipFlag,progname);
196 case FROMTO_UNIX2MAC: /* unix2mac */
197 while ((TempChar = d2u_getwc(ipInF, ipFlag->bomtype)) != WEOF) {
198 if ((ipFlag->Force == 0) &&
200 (TempChar != 0x0a) && /* Not an LF */
201 (TempChar != 0x0d) && /* Not a CR */
202 (TempChar != 0x09) && /* Not a TAB */
203 (TempChar != 0x0c)) { /* Not a form feed */
205 ipFlag->status |= BINARY_FILE ;
206 if (ipFlag->verbose) {
207 if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
208 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
209 D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x00%02X found at line %u\n"), TempChar, line_nr);
213 if (TempChar != 0x0a) { /* Not an LF */
214 if(d2u_putwc(TempChar, ipOutF, ipFlag, progname) == WEOF) {
216 d2u_putwc_error(ipFlag,progname);
219 PreviousChar = TempChar;
220 if (TempChar == 0x0d) /* CR */
223 /* TempChar is an LF */
224 if (PreviousChar != 0x0d) /* CR already counted */
226 /* Don't touch this delimiter if it's a CR,LF pair. */
227 if ( PreviousChar == 0x0d ) {
228 if (d2u_putwc(0x0a, ipOutF, ipFlag, progname) == WEOF) { /* CR,LF pair. Put LF */
230 d2u_putwc_error(ipFlag,progname);
233 PreviousChar = TempChar;
236 PreviousChar = TempChar;
237 if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) { /* Unix line end (LF). Put CR */
239 d2u_putwc_error(ipFlag,progname);
243 if (ipFlag->NewLine) { /* add additional CR? */
244 if (d2u_putwc(0x0d, ipOutF, ipFlag, progname) == WEOF) {
246 d2u_putwc_error(ipFlag,progname);
252 if ((TempChar == WEOF) && ferror(ipInF)) {
254 d2u_getc_error(ipFlag,progname);
257 default: /* unknown FromToMode */
260 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
261 D2U_UTF8_FPRINTF(stderr, _("program error, invalid conversion mode %d\n"),ipFlag->FromToMode);
265 if (ipFlag->status & UNICODE_CONVERSION_ERROR)
266 ipFlag->line_nr = line_nr;
267 if ((RetVal == 0) && (ipFlag->verbose > 1)) {
268 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
269 D2U_UTF8_FPRINTF(stderr, _("Converted %u out of %u line breaks.\n"), converted, line_nr -1);
275 /* converts stream ipInF to DOS format text and write to stream ipOutF
276 * RetVal: 0 if success
279 int ConvertUnixToDos(FILE* ipInF, FILE* ipOutF, CFlag *ipFlag, const char *progname)
283 int PreviousChar = 0;
285 unsigned int line_nr = 1;
286 unsigned int converted = 0;
290 switch (ipFlag->ConvMode) {
291 case CONVMODE_ASCII: /* ascii */
292 case CONVMODE_UTF16LE: /* Assume UTF-16LE, bomtype = FILE_UTF8 or GB18030 */
293 case CONVMODE_UTF16BE: /* Assume UTF-16BE, bomtype = FILE_UTF8 or GB18030 */
294 ConvTable = U2DAsciiTable;
296 case CONVMODE_7BIT: /* 7bit */
297 ConvTable = U2D7BitTable;
299 case CONVMODE_437: /* iso */
300 ConvTable = U2DIso437Table;
302 case CONVMODE_850: /* iso */
303 ConvTable = U2DIso850Table;
305 case CONVMODE_860: /* iso */
306 ConvTable = U2DIso860Table;
308 case CONVMODE_863: /* iso */
309 ConvTable = U2DIso863Table;
311 case CONVMODE_865: /* iso */
312 ConvTable = U2DIso865Table;
314 case CONVMODE_1252: /* iso */
315 ConvTable = U2DIso1252Table;
317 default: /* unknown convmode */
318 ipFlag->status |= WRONG_CODEPAGE ;
321 /* Turn off ISO and 7-bit conversion for Unicode text files */
322 if (ipFlag->bomtype > 0)
323 ConvTable = U2DAsciiTable;
325 if ((ipFlag->ConvMode > CONVMODE_7BIT) && (ipFlag->verbose)) { /* not ascii or 7bit */
326 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
327 D2U_UTF8_FPRINTF(stderr, _("using code page %d.\n"), ipFlag->ConvMode);
331 /* CR-LF -> CR-LF, in case the input file is a DOS text file */
332 /* \x0a = Newline/Line Feed (LF) */
333 /* \x0d = Carriage Return (CR) */
335 switch (ipFlag->FromToMode) {
336 case FROMTO_UNIX2DOS: /* unix2dos */
337 while ((TempChar = fgetc(ipInF)) != EOF) { /* get character */
338 if ((ipFlag->Force == 0) &&
340 (TempChar != '\x0a') && /* Not an LF */
341 (TempChar != '\x0d') && /* Not a CR */
342 (TempChar != '\x09') && /* Not a TAB */
343 (TempChar != '\x0c')) { /* Not a form feed */
345 ipFlag->status |= BINARY_FILE ;
346 if (ipFlag->verbose) {
347 if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
348 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
349 D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x%02X found at line %u\n"), TempChar, line_nr);
353 if (TempChar == '\x0a')
355 if (fputc('\x0d', ipOutF) == EOF) { /* got LF, put extra CR */
357 d2u_putc_error(ipFlag,progname);
362 if (TempChar == '\x0d') { /* got CR */
363 if ((TempChar = fgetc(ipInF)) == EOF) { /* get next char (possibly LF) */
364 if (ferror(ipInF)) /* Read error */
366 TempChar = '\x0d'; /* end of file. */
368 if (fputc('\x0d', ipOutF) == EOF) { /* put CR */
370 d2u_putc_error(ipFlag,progname);
373 PreviousChar = '\x0d';
377 if (TempChar == '\x0a') /* Count all DOS and Unix line breaks */
379 if (fputc(ConvTable[TempChar], ipOutF) == EOF) { /* put LF or other char */
381 d2u_putc_error(ipFlag,progname);
384 if (AddDOSNewLine( ipOutF, ipFlag, TempChar, PreviousChar, progname) == EOF) {
389 PreviousChar = TempChar;
391 if ((TempChar == EOF) && ferror(ipInF)) {
393 d2u_getc_error(ipFlag,progname);
396 case FROMTO_UNIX2MAC: /* unix2mac */
397 while ((TempChar = fgetc(ipInF)) != EOF) {
398 if ((ipFlag->Force == 0) &&
400 (TempChar != '\x0a') && /* Not an LF */
401 (TempChar != '\x0d') && /* Not a CR */
402 (TempChar != '\x09') && /* Not a TAB */
403 (TempChar != '\x0c')) { /* Not a form feed */
405 ipFlag->status |= BINARY_FILE ;
406 if (ipFlag->verbose) {
407 if ((ipFlag->stdio_mode) && (!ipFlag->error)) ipFlag->error = 1;
408 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
409 D2U_UTF8_FPRINTF(stderr, _("Binary symbol 0x%02X found at line %u\n"), TempChar, line_nr);
413 if (TempChar != '\x0a') { /* Not an LF */
414 if(fputc(ConvTable[TempChar], ipOutF) == EOF) {
416 d2u_putc_error(ipFlag,progname);
419 PreviousChar = TempChar;
420 if (TempChar == '\x0d') /* CR */
423 /* TempChar is an LF */
424 if (PreviousChar != '\x0d') /* CR already counted */
426 /* Don't touch this delimiter if it's a CR,LF pair. */
427 if ( PreviousChar == '\x0d' ) {
428 if (fputc('\x0a', ipOutF) == EOF) { /* CR,LF pair. Put LF */
430 d2u_putc_error(ipFlag,progname);
433 PreviousChar = TempChar;
436 PreviousChar = TempChar;
437 if (fputc('\x0d', ipOutF) == EOF) { /* Unix line end (LF). Put CR */
439 d2u_putc_error(ipFlag,progname);
443 if (ipFlag->NewLine) { /* add additional CR? */
444 if (fputc('\x0d', ipOutF) == EOF) {
446 d2u_putc_error(ipFlag,progname);
452 if ((TempChar == EOF) && ferror(ipInF)) {
454 d2u_getc_error(ipFlag,progname);
457 default: /* unknown FromToMode */
460 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
461 D2U_UTF8_FPRINTF(stderr, _("program error, invalid conversion mode %d\n"),ipFlag->FromToMode);
465 if ((RetVal == 0) && (ipFlag->verbose > 1)) {
466 D2U_UTF8_FPRINTF(stderr, "%s: ", progname);
467 D2U_UTF8_FPRINTF(stderr, _("Converted %u out of %u line breaks.\n"), converted, line_nr -1);
473 int main (int argc, char *argv[])
475 /* variable declarations */
479 char localedir[1024];
481 int _dowildcard = -1; /* enable wildcard expansion for Win64 */
491 strcpy(progname,"unix2dos");
494 ptr = getenv("DOS2UNIX_LOCALEDIR");
496 d2u_strncpy(localedir,LOCALEDIR,sizeof(localedir));
498 if (strlen(ptr) < sizeof(localedir))
499 d2u_strncpy(localedir,ptr,sizeof(localedir));
501 D2U_UTF8_FPRINTF(stderr,"%s: ",progname);
502 D2U_ANSI_FPRINTF(stderr, "%s", _("error: Value of environment variable DOS2UNIX_LOCALEDIR is too long.\n"));
503 d2u_strncpy(localedir,LOCALEDIR,sizeof(localedir));
508 #if defined(ENABLE_NLS) || (defined(D2U_UNICODE) && !defined(__MSDOS__) && !defined(_WIN32) && !defined(__OS2__))
509 /* setlocale() is also needed for nl_langinfo() */
510 #if (defined(_WIN32) && !defined(__CYGWIN__))
511 /* When the locale is set to "" on Windows all East-Asian multi-byte ANSI encoded text is printed
512 wrongly when you use standard printf(). Also UTF-8 code is printed wrongly. See also test/setlocale.c.
513 When we set the locale to "C" gettext still translates the messages on Windows. On Unix this would disable
515 setlocale (LC_ALL, "C");
517 setlocale (LC_ALL, "");
522 bindtextdomain (PACKAGE, localedir);
523 textdomain (PACKAGE);
527 /* variable initialisations */
528 pFlag = (CFlag*)malloc(sizeof(CFlag));
530 D2U_UTF8_FPRINTF(stderr, "unix2dos:");
531 D2U_ANSI_FPRINTF(stderr, " %s\n", strerror(errno));
534 pFlag->FromToMode = FROMTO_UNIX2DOS; /* default unix2dos */
537 if ( ((ptr=strrchr(argv[0],'/')) == NULL) && ((ptr=strrchr(argv[0],'\\')) == NULL) )
542 if ((strcmpi("unix2mac", ptr) == 0) || (strcmpi("unix2mac.exe", ptr) == 0)) {
543 pFlag->FromToMode = FROMTO_UNIX2MAC;
544 strcpy(progname,"unix2mac");
548 /* Get arguments in wide Unicode format in the Windows Command Prompt */
550 /* This does not support wildcard expansion (globbing) */
551 wargv = CommandLineToArgvW(GetCommandLineW(), &argc);
553 argv_glob = (char ***)malloc(sizeof(char***));
554 if (argv_glob == NULL) {
555 D2U_UTF8_FPRINTF(stderr, "%s:", progname);
556 D2U_ANSI_FPRINTF(stderr, " %s\n", strerror(errno));
560 /* Glob the arguments and convert them to UTF-8 */
561 argc_new = glob_warg(argc, wargv, argv_glob, pFlag, progname);
562 argv_new = *argv_glob;
569 return parse_options(argc_new, argv_new, pFlag, localedir, progname, PrintLicense, ConvertUnixToDos, ConvertUnixToDosW);
571 return parse_options(argc_new, argv_new, pFlag, localedir, progname, PrintLicense, ConvertUnixToDos);