man/man1/dos2unix.1

   1 .\" Automatically generated by Pod::Man 2.27 (Pod::Simple 3.28)
   2 .\"
   3 .\" Standard preamble:
   4 .\" ========================================================================
   5 .de Sp \" Vertical space (when we can't use .PP)
   6 .if t .sp .5v
   7 .if n .sp
   8 ..
   9 .de Vb \" Begin verbatim text
  10 .ft CW
  11 .nf
  12 .ne \\$1
  13 ..
  14 .de Ve \" End verbatim text
  15 .ft R
  16 .fi
  17 ..
  18 .\" Set up some character translations and predefined strings.  \*(-- will
  19 .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left
  20 .\" double quote, and \*(R" will give a right double quote.  \*(C+ will
  21 .\" give a nicer C++.  Capital omega is used to do unbreakable dashes and
  22 .\" therefore won't be available.  \*(C` and \*(C' expand to `' in nroff,
  23 .\" nothing in troff, for use with C<>.
  24 .tr \(*W-
  25 .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p'
  26 .ie n \{\
  27 .    ds -- \(*W-
  28 .    ds PI pi
  29 .    if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch
  30 .    if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\"  diablo 12 pitch
  31 .    ds L" ""
  32 .    ds R" ""
  33 .    ds C` ""
  34 .    ds C' ""
  35 'br\}
  36 .el\{\
  37 .    ds -- \|\(em\|
  38 .    ds PI \(*p
  39 .    ds L" ``
  40 .    ds R" ''
  41 .    ds C`
  42 .    ds C'
  43 'br\}
  44 .\"
  45 .\" Escape single quotes in literal strings from groff's Unicode transform.
  46 .ie \n(.g .ds Aq \(aq
  47 .el       .ds Aq '
  48 .\"
  49 .\" If the F register is turned on, we'll generate index entries on stderr for
  50 .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index
  51 .\" entries marked with X<> in POD.  Of course, you'll have to process the
  52 .\" output yourself in some meaningful fashion.
  53 .\"
  54 .\" Avoid warning from groff about undefined register 'F'.
  55 .de IX
  56 ..
  57 .nr rF 0
  58 .if \n(.g .if rF .nr rF 1
  59 .if (\n(rF:(\n(.g==0)) \{
  60 .    if \nF \{
  61 .        de IX
  62 .        tm Index:\\$1\t\\n%\t"\\$2"
  63 ..
  64 .        if !\nF==2 \{
  65 .            nr % 0
  66 .            nr F 2
  67 .        \}
  68 .    \}
  69 .\}
  70 .rr rF
  71 .\" ========================================================================
  72 .\"
  73 .IX Title "dos2unix 1"
  74 .TH dos2unix 1 "2014-09-17" "dos2unix" "2014-10-06"
  75 .\" For nroff, turn off justification.  Always turn off hyphenation; it makes
  76 .\" way too many mistakes in technical documents.
  77 .if n .ad l
  78 .nh
  79 .SH "NAME"
  80 dos2unix \- DOS/Mac to Unix and vice versa text file format converter
  81 .SH "SYNOPSIS"
  82 .IX Header "SYNOPSIS"
  83 .Vb 2
  84 \&    dos2unix [options] [FILE ...] [\-n INFILE OUTFILE ...]
  85 \&    unix2dos [options] [FILE ...] [\-n INFILE OUTFILE ...]
  86 .Ve
  87 .SH "DESCRIPTION"
  88 .IX Header "DESCRIPTION"
  89 The Dos2unix package includes utilities \f(CW\*(C`dos2unix\*(C'\fR and \f(CW\*(C`unix2dos\*(C'\fR to convert
  90 plain text files in \s-1DOS\s0 or Mac format to Unix format and vice versa.
  91 .PP
  92 In DOS/Windows text files a line break, also known as newline, is a combination
  93 of two characters: a Carriage Return (\s-1CR\s0) followed by a Line Feed (\s-1LF\s0). In Unix
  94 text files a line break is a single character: the Line Feed (\s-1LF\s0). In Mac text
  95 files, prior to Mac \s-1OS X,\s0 a line break was single Carriage Return (\s-1CR\s0)
  96 character. Nowadays Mac \s-1OS\s0 uses Unix style (\s-1LF\s0) line breaks.
  97 .PP
  98 Besides line breaks Dos2unix can also convert the encoding of files. A few
  99 \&\s-1DOS\s0 code pages can be converted to Unix Latin\-1. And Windows Unicode (\s-1UTF\-16\s0)
 100 files can be converted to Unix Unicode (\s-1UTF\-8\s0) files.
 101 .PP
 102 Binary files are automatically skipped, unless conversion is forced.
 103 .PP
 104 Non-regular files, such as directories and FIFOs, are automatically skipped.
 105 .PP
 106 Symbolic links and their targets are by default kept untouched.  Symbolic links
 107 can optionally be replaced, or the output can be written to the symbolic link
 108 target.  Writing to a symbolic link target is not supported on Windows.
 109 .PP
 110 Dos2unix was modelled after dos2unix under SunOS/Solaris.  There is one
 111 important difference with the original SunOS/Solaris version. This version does
 112 by default in-place conversion (old file mode), while the original
 113 SunOS/Solaris version only supports paired conversion (new file mode). See also
 114 options \f(CW\*(C`\-o\*(C'\fR and \f(CW\*(C`\-n\*(C'\fR.
 115 .SH "OPTIONS"
 116 .IX Header "OPTIONS"
 117 .IP "\fB\-\-\fR" 4
 118 .IX Item "--"
 119 Treat all following options as file names. Use this option if you want to
 120 convert files whose names start with a dash. For instance to convert
 121 a file named \*(L"\-foo\*(R", you can use this command:
 122 .Sp
 123 .Vb 1
 124 \&    dos2unix \-\- \-foo
 125 .Ve
 126 .Sp
 127 Or in new file mode:
 128 .Sp
 129 .Vb 1
 130 \&    dos2unix \-n \-\- \-foo out.txt
 131 .Ve
 132 .IP "\fB\-ascii\fR" 4
 133 .IX Item "-ascii"
 134 Convert only line breaks. This is the default conversion mode.
 135 .IP "\fB\-iso\fR" 4
 136 .IX Item "-iso"
 137 Conversion between \s-1DOS\s0 and \s-1ISO\-8859\-1\s0 character set. See also section
 138 \&\s-1CONVERSION MODES.\s0
 139 .IP "\fB\-1252\fR" 4
 140 .IX Item "-1252"
 141 Use Windows code page 1252 (Western European).
 142 .IP "\fB\-437\fR" 4
 143 .IX Item "-437"
 144 Use \s-1DOS\s0 code page 437 (\s-1US\s0). This is the default code page used for \s-1ISO\s0 conversion.
 145 .IP "\fB\-850\fR" 4
 146 .IX Item "-850"
 147 Use \s-1DOS\s0 code page 850 (Western European).
 148 .IP "\fB\-860\fR" 4
 149 .IX Item "-860"
 150 Use \s-1DOS\s0 code page 860 (Portuguese).
 151 .IP "\fB\-863\fR" 4
 152 .IX Item "-863"
 153 Use \s-1DOS\s0 code page 863 (French Canadian).
 154 .IP "\fB\-865\fR" 4
 155 .IX Item "-865"
 156 Use \s-1DOS\s0 code page 865 (Nordic).
 157 .IP "\fB\-7\fR" 4
 158 .IX Item "-7"
 159 Convert 8 bit characters to 7 bit space.
 160 .IP "\fB\-b, \-\-keep\-bom\fR" 4
 161 .IX Item "-b, --keep-bom"
 162 Keep Byte Order Mark (\s-1BOM\s0). When the input file has a \s-1BOM,\s0 write a \s-1BOM\s0 in
 163 the output file. This is the default behavior when converting to \s-1DOS\s0 line
 164 breaks. See also option \f(CW\*(C`\-r\*(C'\fR.
 165 .IP "\fB\-c, \-\-convmode \s-1CONVMODE\s0\fR" 4
 166 .IX Item "-c, --convmode CONVMODE"
 167 Set conversion mode. Where \s-1CONVMODE\s0 is one of:
 168 \&\fIascii\fR, \fI7bit\fR, \fIiso\fR, \fImac\fR
 169 with ascii being the default.
 170 .IP "\fB\-f, \-\-force\fR" 4
 171 .IX Item "-f, --force"
 172 Force conversion of binary files.
 173 .IP "\fB\-h, \-\-help\fR" 4
 174 .IX Item "-h, --help"
 175 Display help and exit.
 176 .IP "\fB\-i[\s-1FLAGS\s0], \-\-info[=FLAGS] \s-1FILE ...\s0\fR" 4
 177 .IX Item "-i[FLAGS], --info[=FLAGS] FILE ..."
 178 Display file information. No conversion is done.
 179 .Sp
 180 The following information is printed, in this order: number of \s-1DOS\s0 line breaks,
 181 number of Unix line breaks, number of Mac line breaks, byte order mark, text
 182 or binary, file name.
 183 .Sp
 184 Example output:
 185 .Sp
 186 .Vb 8
 187 \&     6       0       0  no_bom    text    dos.txt
 188 \&     0       6       0  no_bom    text    unix.txt
 189 \&     0       0       6  no_bom    text    mac.txt
 190 \&     6       6       6  no_bom    text    mixed.txt
 191 \&    50       0       0  UTF\-16LE  text    utf16le.txt
 192 \&     0      50       0  no_bom    text    utf8unix.txt
 193 \&    50       0       0  UTF\-8     text    utf8dos.txt
 194 \&     2     418     219  no_bom    binary  dos2unix.exe
 195 .Ve
 196 .Sp
 197 Optionally extra flags can be set to change the output. One or more flags
 198 can be added.
 199 .RS 4
 200 .IP "\fBd\fR" 4
 201 .IX Item "d"
 202 Print number of \s-1DOS\s0 line breaks.
 203 .IP "\fBu\fR" 4
 204 .IX Item "u"
 205 Print number of Unix line breaks.
 206 .IP "\fBm\fR" 4
 207 .IX Item "m"
 208 Print number of Mac line breaks.
 209 .IP "\fBb\fR" 4
 210 .IX Item "b"
 211 Print the byte order mark.
 212 .IP "\fBt\fR" 4
 213 .IX Item "t"
 214 Print if file is text or binary.
 215 .IP "\fBc\fR" 4
 216 .IX Item "c"
 217 Print only the files that would be converted.
 218 .Sp
 219 With the \f(CW\*(C`c\*(C'\fR flag dos2unix will print only the files that contain \s-1DOS\s0 line breaks,
 220 unix2dos will print only file names that have Unix line breaks.
 221 .RE
 222 .RS 4
 223 .Sp
 224 Examples:
 225 .Sp
 226 Show information for all *.txt files:
 227 .Sp
 228 .Vb 1
 229 \&    dos2unix \-i *.txt
 230 .Ve
 231 .Sp
 232 Show only the number of \s-1DOS\s0 line breaks and Unix line breaks:
 233 .Sp
 234 .Vb 1
 235 \&    dos2unix \-idu *.txt
 236 .Ve
 237 .Sp
 238 Show only the byte order mark:
 239 .Sp
 240 .Vb 1
 241 \&    dos2unix \-\-info=b *.txt
 242 .Ve
 243 .Sp
 244 List the files that have \s-1DOS\s0 line breaks.
 245 .Sp
 246 .Vb 1
 247 \&    dos2unix \-ic *.txt
 248 .Ve
 249 .Sp
 250 List the files that have Unix line breaks.
 251 .Sp
 252 .Vb 1
 253 \&    unix2dos \-ic *.txt
 254 .Ve
 255 .RE
 256 .IP "\fB\-k, \-\-keepdate\fR" 4
 257 .IX Item "-k, --keepdate"
 258 Keep the date stamp of output file same as input file.
 259 .IP "\fB\-L, \-\-license\fR" 4
 260 .IX Item "-L, --license"
 261 Display program's license.
 262 .IP "\fB\-l, \-\-newline\fR" 4
 263 .IX Item "-l, --newline"
 264 Add additional newline.
 265 .Sp
 266 \&\fBdos2unix\fR: Only \s-1DOS\s0 line breaks are changed to two Unix line breaks.
 267 In Mac mode only Mac line breaks are changed to two Unix
 268 line breaks.
 269 .Sp
 270 \&\fBunix2dos\fR: Only Unix line breaks are changed to two \s-1DOS\s0 line breaks.
 271 In Mac mode Unix line breaks are changed to two Mac line breaks.
 272 .IP "\fB\-m, \-\-add\-bom\fR" 4
 273 .IX Item "-m, --add-bom"
 274 Write a Byte Order Mark (\s-1BOM\s0) in the output file. By default an \s-1UTF\-8 BOM\s0
 275 is written.
 276 .Sp
 277 When the input file is \s-1UTF\-16,\s0 and the option \f(CW\*(C`\-u\*(C'\fR is used, an \s-1UTF\-16
 278 BOM\s0 will be written.
 279 .Sp
 280 Never use this option when the output encoding is other than \s-1UTF\-8\s0 or \s-1UTF\-16.\s0
 281 See also section \s-1UNICODE.\s0
 282 .IP "\fB\-n, \-\-newfile \s-1INFILE OUTFILE ...\s0\fR" 4
 283 .IX Item "-n, --newfile INFILE OUTFILE ..."
 284 New file mode. Convert file \s-1INFILE\s0 and write output to file \s-1OUTFILE.\s0
 285 File names must be given in pairs and wildcard names should \fInot\fR be
 286 used or you \fIwill\fR lose your files.
 287 .Sp
 288 The person who starts the conversion in new file (paired) mode will be the owner
 289 of the converted file. The read/write permissions of the new file will be the
 290 permissions of the original file minus the \fIumask\fR\|(1) of the person who runs the
 291 conversion.
 292 .IP "\fB\-o, \-\-oldfile \s-1FILE ...\s0\fR" 4
 293 .IX Item "-o, --oldfile FILE ..."
 294 Old file mode. Convert file \s-1FILE\s0 and overwrite output to it. The program
 295 defaults to run in this mode. Wildcard names may be used.
 296 .Sp
 297 In old file (in-place) mode the converted file gets the same owner, group, and
 298 read/write permissions as the original file. Also when the file is converted by
 299 another user who has write permissions on the file (e.g. user root).  The
 300 conversion will be aborted when it is not possible to preserve the original
 301 values.  Change of owner could mean that the original owner is not able to read
 302 the file any more. Change of group could be a security risk, the file could be
 303 made readable for persons for whom it is not intended.  Preservation of owner,
 304 group, and read/write permissions is only supported on Unix.
 305 .IP "\fB\-q, \-\-quiet\fR" 4
 306 .IX Item "-q, --quiet"
 307 Quiet mode. Suppress all warnings and messages. The return value is zero.
 308 Except when wrong command-line options are used.
 309 .IP "\fB\-r, \-\-remove\-bom\fR" 4
 310 .IX Item "-r, --remove-bom"
 311 Remove Byte Order Mark (\s-1BOM\s0). Do not write a \s-1BOM\s0 in the output file.
 312 This is the default behavior when converting to Unix line breaks.
 313 See also option \f(CW\*(C`\-b\*(C'\fR.
 314 .IP "\fB\-s, \-\-safe\fR" 4
 315 .IX Item "-s, --safe"
 316 Skip binary files (default).
 317 .IP "\fB\-u, \-\-keep\-utf16\fR" 4
 318 .IX Item "-u, --keep-utf16"
 319 Keep the original \s-1UTF\-16\s0 encoding of the input file. The output file will be
 320 written in the same \s-1UTF\-16\s0 encoding, little or big endian, as the input file.
 321 This prevents transformation to \s-1UTF\-8.\s0 An \s-1UTF\-16 BOM\s0 will be written
 322 accordingly. This option can be disabled with the \f(CW\*(C`\-ascii\*(C'\fR option.
 323 .IP "\fB\-ul, \-\-assume\-utf16le\fR" 4
 324 .IX Item "-ul, --assume-utf16le"
 325 Assume that the input file format is \s-1UTF\-16LE.\s0
 326 .Sp
 327 When there is a Byte Order Mark in the input file the \s-1BOM\s0 has priority over
 328 this option.
 329 .Sp
 330 When you made a wrong assumption (the input file was not in \s-1UTF\-16LE\s0 format) and
 331 the conversion succeeded, you will get an \s-1UTF\-8\s0 output file with wrong text.
 332 You can undo the wrong conversion with \fIiconv\fR\|(1) by converting the \s-1UTF\-8\s0 output
 333 file back to \s-1UTF\-16LE.\s0 This will bring back the original file.
 334 .Sp
 335 The assumption of \s-1UTF\-16LE\s0 works as a \fIconversion mode\fR. By switching to the default
 336 \&\fIascii\fR mode the \s-1UTF\-16LE\s0 assumption is turned off.
 337 .IP "\fB\-ub, \-\-assume\-utf16be\fR" 4
 338 .IX Item "-ub, --assume-utf16be"
 339 Assume that the input file format is \s-1UTF\-16BE.\s0
 340 .Sp
 341 This option works the same as option \f(CW\*(C`\-ul\*(C'\fR.
 342 .IP "\fB\-v, \-\-verbose\fR" 4
 343 .IX Item "-v, --verbose"
 344 Display verbose messages. Extra information is displayed about Byte Order Marks
 345 and the amount of converted line breaks.
 346 .IP "\fB\-F, \-\-follow\-symlink\fR" 4
 347 .IX Item "-F, --follow-symlink"
 348 Follow symbolic links and convert the targets.
 349 .IP "\fB\-R, \-\-replace\-symlink\fR" 4
 350 .IX Item "-R, --replace-symlink"
 351 Replace symbolic links with converted files
 352 (original target files remain unchanged).
 353 .IP "\fB\-S, \-\-skip\-symlink\fR" 4
 354 .IX Item "-S, --skip-symlink"
 355 Keep symbolic links and targets unchanged (default).
 356 .IP "\fB\-V, \-\-version\fR" 4
 357 .IX Item "-V, --version"
 358 Display version information and exit.
 359 .SH "MAC MODE"
 360 .IX Header "MAC MODE"
 361 In normal mode line breaks are converted from \s-1DOS\s0 to Unix and vice versa.
 362 Mac line breaks are not converted.
 363 .PP
 364 In Mac mode line breaks are converted from Mac to Unix and vice versa. \s-1DOS\s0
 365 line breaks are not changed.
 366 .PP
 367 To run in Mac mode use the command-line option \f(CW\*(C`\-c mac\*(C'\fR or use the
 368 commands \f(CW\*(C`mac2unix\*(C'\fR or \f(CW\*(C`unix2mac\*(C'\fR.
 369 .SH "CONVERSION MODES"
 370 .IX Header "CONVERSION MODES"
 371 .IP "\fBascii\fR" 4
 372 .IX Item "ascii"
 373 In mode \f(CW\*(C`ascii\*(C'\fR only line breaks are converted. This is the default conversion
 374 mode.
 375 .Sp
 376 Although the name of this mode is \s-1ASCII,\s0 which is a 7 bit standard, the
 377 actual mode is 8 bit. Use always this mode when converting Unicode \s-1UTF\-8\s0
 378 files.
 379 .IP "\fB7bit\fR" 4
 380 .IX Item "7bit"
 381 In this mode all 8 bit non-ASCII characters (with values from 128 to 255)
 382 are converted to a 7 bit space.
 383 .IP "\fBiso\fR" 4
 384 .IX Item "iso"
 385 Characters are converted between a \s-1DOS\s0 character set (code page) and \s-1ISO\s0
 386 character set \s-1ISO\-8859\-1 \s0(Latin\-1) on Unix. \s-1DOS\s0 characters without \s-1ISO\-8859\-1\s0
 387 equivalent, for which conversion is not possible, are converted to a dot. The
 388 same counts for \s-1ISO\-8859\-1\s0 characters without \s-1DOS\s0 counterpart.
 389 .Sp
 390 When only option \f(CW\*(C`\-iso\*(C'\fR is used dos2unix will try to determine the active code
 391 page. When this is not possible dos2unix will use default code page \s-1CP437,\s0
 392 which is mainly used in the \s-1USA. \s0 To force a specific code page use options
 393 \&\f(CW\*(C`\-437\*(C'\fR (\s-1US\s0), \f(CW\*(C`\-850\*(C'\fR (Western European), \f(CW\*(C`\-860\*(C'\fR (Portuguese), \f(CW\*(C`\-863\*(C'\fR (French
 394 Canadian), or \f(CW\*(C`\-865\*(C'\fR (Nordic).  Windows code page \s-1CP1252 \s0(Western European) is
 395 also supported with option \f(CW\*(C`\-1252\*(C'\fR. For other code pages use dos2unix in
 396 combination with \fIiconv\fR\|(1).  Iconv can convert between a long list of character
 397 encodings.
 398 .Sp
 399 Never use \s-1ISO\s0 conversion on Unicode text files. It will corrupt \s-1UTF\-8\s0 encoded files.
 400 .Sp
 401 Some examples:
 402 .Sp
 403 Convert from \s-1DOS\s0 default code page to Unix Latin\-1
 404 .Sp
 405 .Vb 1
 406 \&    dos2unix \-iso \-n in.txt out.txt
 407 .Ve
 408 .Sp
 409 Convert from \s-1DOS CP850\s0 to Unix Latin\-1
 410 .Sp
 411 .Vb 1
 412 \&    dos2unix \-850 \-n in.txt out.txt
 413 .Ve
 414 .Sp
 415 Convert from Windows \s-1CP1252\s0 to Unix Latin\-1
 416 .Sp
 417 .Vb 1
 418 \&    dos2unix \-1252 \-n in.txt out.txt
 419 .Ve
 420 .Sp
 421 Convert from Windows \s-1CP1252\s0 to Unix \s-1UTF\-8 \s0(Unicode)
 422 .Sp
 423 .Vb 1
 424 \&    iconv \-f CP1252 \-t UTF\-8 in.txt | dos2unix > out.txt
 425 .Ve
 426 .Sp
 427 Convert from Unix Latin\-1 to \s-1DOS\s0 default code page
 428 .Sp
 429 .Vb 1
 430 \&    unix2dos \-iso \-n in.txt out.txt
 431 .Ve
 432 .Sp
 433 Convert from Unix Latin\-1 to \s-1DOS CP850\s0
 434 .Sp
 435 .Vb 1
 436 \&    unix2dos \-850 \-n in.txt out.txt
 437 .Ve
 438 .Sp
 439 Convert from Unix Latin\-1 to Windows \s-1CP1252\s0
 440 .Sp
 441 .Vb 1
 442 \&    unix2dos \-1252 \-n in.txt out.txt
 443 .Ve
 444 .Sp
 445 Convert from Unix \s-1UTF\-8 \s0(Unicode) to Windows \s-1CP1252\s0
 446 .Sp
 447 .Vb 1
 448 \&    unix2dos < in.txt | iconv \-f UTF\-8 \-t CP1252 > out.txt
 449 .Ve
 450 .Sp
 451 See also <http://czyborra.com/charsets/codepages.html>
 452 and <http://czyborra.com/charsets/iso8859.html>.
 453 .SH "UNICODE"
 454 .IX Header "UNICODE"
 455 .SS "Encodings"
 456 .IX Subsection "Encodings"
 457 There exist different Unicode encodings. On Unix and Linux Unicode files are
 458 typically encoded in \s-1UTF\-8\s0 encoding. On Windows Unicode text files can be
 459 encoded in \s-1UTF\-8, UTF\-16,\s0 or \s-1UTF\-16\s0 big endian, but are mostly encoded in
 460 \&\s-1UTF\-16\s0 format.
 461 .SS "Conversion"
 462 .IX Subsection "Conversion"
 463 Unicode text files can have \s-1DOS,\s0 Unix or Mac line breaks, like regular text
 464 files.
 465 .PP
 466 All versions of dos2unix and unix2dos can convert \s-1UTF\-8\s0 encoded files, because
 467 \&\s-1UTF\-8\s0 was designed for backward compatibility with \s-1ASCII.\s0
 468 .PP
 469 Dos2unix and unix2dos with Unicode \s-1UTF\-16\s0 support, can read little and big
 470 endian \s-1UTF\-16\s0 encoded text files. To see if dos2unix was built with \s-1UTF\-16\s0
 471 support type \f(CW\*(C`dos2unix \-V\*(C'\fR.
 472 .PP
 473 \&\s-1UTF\-16\s0 encoded files are by default converted to \s-1UTF\-8.\s0 On Unix/Linux it is
 474 required that the locale character encoding is set to \s-1UTF\-8.\s0 Use the \fIlocale\fR\|(1)
 475 command to find out what the locale character encoding is. \s-1UTF\-8\s0 formatted
 476 text files are well supported on both Windows and Unix/Linux.
 477 .PP
 478 \&\s-1UTF\-16\s0 and \s-1UTF\-8\s0 encoding are fully compatible, there will no text be lost in
 479 the conversion. When an \s-1UTF\-16\s0 to \s-1UTF\-8\s0 conversion error occurs, for instance
 480 when the \s-1UTF\-16\s0 input file contains an error, the file will be skipped.
 481 .PP
 482 When option \f(CW\*(C`\-u\*(C'\fR is used, the output file will be written in the same \s-1UTF\-16\s0
 483 encoding as the input file. Option \f(CW\*(C`\-u\*(C'\fR prevents conversion to \s-1UTF\-8.\s0
 484 .PP
 485 Dos2unix and unix2dos have no option to convert \s-1UTF\-8\s0 files to \s-1UTF\-16.\s0
 486 .PP
 487 \&\s-1ISO\s0 and 7\-bit mode conversion do not work on \s-1UTF\-16\s0 files.
 488 .SS "Byte Order Mark"
 489 .IX Subsection "Byte Order Mark"
 490 On Windows Unicode text files typically have a Byte Order Mark (\s-1BOM\s0), because
 491 many Windows programs (including Notepad) add BOMs by default. See also
 492 <http://en.wikipedia.org/wiki/Byte_order_mark>.
 493 .PP
 494 On Unix Unicode files typically don't have a \s-1BOM.\s0 It is assumed that text files
 495 are encoded in the locale character encoding.
 496 .PP
 497 Dos2unix can only detect if a file is in \s-1UTF\-16\s0 format if the file has a \s-1BOM.\s0
 498 When an \s-1UTF\-16\s0 file doesn't have a \s-1BOM,\s0 dos2unix will see the file as a binary
 499 file.
 500 .PP
 501 Use option \f(CW\*(C`\-ul\*(C'\fR or \f(CW\*(C`\-ub\*(C'\fR to convert an \s-1UTF\-16\s0 file without \s-1BOM.\s0
 502 .PP
 503 Dos2unix writes by default no \s-1BOM\s0 in the output file. With option \f(CW\*(C`\-b\*(C'\fR
 504 Dos2unix writes a \s-1BOM\s0 when the input file has a \s-1BOM.\s0
 505 .PP
 506 Unix2dos writes by default a \s-1BOM\s0 in the output file when the input file has a
 507 \&\s-1BOM.\s0 Use option \f(CW\*(C`\-r\*(C'\fR to remove the \s-1BOM.\s0
 508 .PP
 509 Dos2unix and unix2dos write always a \s-1BOM\s0 when option \f(CW\*(C`\-m\*(C'\fR is used.
 510 .SS "Unicode examples"
 511 .IX Subsection "Unicode examples"
 512 Convert from Windows \s-1UTF\-16 \s0(with \s-1BOM\s0) to Unix \s-1UTF\-8\s0
 513 .PP
 514 .Vb 1
 515 \&    dos2unix \-n in.txt out.txt
 516 .Ve
 517 .PP
 518 Convert from Windows \s-1UTF\-16LE \s0(without \s-1BOM\s0) to Unix \s-1UTF\-8\s0
 519 .PP
 520 .Vb 1
 521 \&    dos2unix \-ul \-n in.txt out.txt
 522 .Ve
 523 .PP
 524 Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-8\s0 with \s-1BOM\s0
 525 .PP
 526 .Vb 1
 527 \&    unix2dos \-m \-n in.txt out.txt
 528 .Ve
 529 .PP
 530 Convert from Unix \s-1UTF\-8\s0 to Windows \s-1UTF\-16\s0
 531 .PP
 532 .Vb 1
 533 \&    unix2dos < in.txt | iconv \-f UTF\-8 \-t UTF\-16 > out.txt
 534 .Ve
 535 .SH "EXAMPLES"
 536 .IX Header "EXAMPLES"
 537 Read input from 'stdin' and write output to 'stdout'.
 538 .PP
 539 .Vb 2
 540 \&    dos2unix
 541 \&    dos2unix \-l \-c mac
 542 .Ve
 543 .PP
 544 Convert and replace a.txt. Convert and replace b.txt.
 545 .PP
 546 .Vb 2
 547 \&    dos2unix a.txt b.txt
 548 \&    dos2unix \-o a.txt b.txt
 549 .Ve
 550 .PP
 551 Convert and replace a.txt in ascii conversion mode.
 552 .PP
 553 .Vb 1
 554 \&    dos2unix a.txt
 555 .Ve
 556 .PP
 557 Convert and replace a.txt in ascii conversion mode.
 558 Convert and replace b.txt in 7bit conversion mode.
 559 .PP
 560 .Vb 3
 561 \&    dos2unix a.txt \-c 7bit b.txt
 562 \&    dos2unix \-c ascii a.txt \-c 7bit b.txt
 563 \&    dos2unix \-ascii a.txt \-7 b.txt
 564 .Ve
 565 .PP
 566 Convert a.txt from Mac to Unix format.
 567 .PP
 568 .Vb 2
 569 \&    dos2unix \-c mac a.txt
 570 \&    mac2unix a.txt
 571 .Ve
 572 .PP
 573 Convert a.txt from Unix to Mac format.
 574 .PP
 575 .Vb 2
 576 \&    unix2dos \-c mac a.txt
 577 \&    unix2mac a.txt
 578 .Ve
 579 .PP
 580 Convert and replace a.txt while keeping original date stamp.
 581 .PP
 582 .Vb 2
 583 \&    dos2unix \-k a.txt
 584 \&    dos2unix \-k \-o a.txt
 585 .Ve
 586 .PP
 587 Convert a.txt and write to e.txt.
 588 .PP
 589 .Vb 1
 590 \&    dos2unix \-n a.txt e.txt
 591 .Ve
 592 .PP
 593 Convert a.txt and write to e.txt, keep date stamp of e.txt same as a.txt.
 594 .PP
 595 .Vb 1
 596 \&    dos2unix \-k \-n a.txt e.txt
 597 .Ve
 598 .PP
 599 Convert and replace a.txt. Convert b.txt and write to e.txt.
 600 .PP
 601 .Vb 2
 602 \&    dos2unix a.txt \-n b.txt e.txt
 603 \&    dos2unix \-o a.txt \-n b.txt e.txt
 604 .Ve
 605 .PP
 606 Convert c.txt and write to e.txt. Convert and replace a.txt.
 607 Convert and replace b.txt. Convert d.txt and write to f.txt.
 608 .PP
 609 .Vb 1
 610 \&    dos2unix \-n c.txt e.txt \-o a.txt b.txt \-n d.txt f.txt
 611 .Ve
 612 .SH "RECURSIVE CONVERSION"
 613 .IX Header "RECURSIVE CONVERSION"
 614 Use dos2unix in combination with the \fIfind\fR\|(1) and \fIxargs\fR\|(1) commands to
 615 recursively convert text files in a directory tree structure. For instance to
 616 convert all .txt files in the directory tree under the current directory type:
 617 .PP
 618 .Vb 1
 619 \&    find . \-name *.txt |xargs dos2unix
 620 .Ve
 621 .SH "LOCALIZATION"
 622 .IX Header "LOCALIZATION"
 623 .IP "\fB\s-1LANG\s0\fR" 4
 624 .IX Item "LANG"
 625 The primary language is selected with the environment variable \s-1LANG.\s0 The \s-1LANG\s0
 626 variable consists out of several parts. The first part is in small letters the
 627 language code. The second is optional and is the country code in capital
 628 letters, preceded with an underscore. There is also an optional third part:
 629 character encoding, preceded with a dot. A few examples for \s-1POSIX\s0 standard type
 630 shells:
 631 .Sp
 632 .Vb 7
 633 \&    export LANG=nl               Dutch
 634 \&    export LANG=nl_NL            Dutch, The Netherlands
 635 \&    export LANG=nl_BE            Dutch, Belgium
 636 \&    export LANG=es_ES            Spanish, Spain
 637 \&    export LANG=es_MX            Spanish, Mexico
 638 \&    export LANG=en_US.iso88591   English, USA, Latin\-1 encoding
 639 \&    export LANG=en_GB.UTF\-8      English, UK, UTF\-8 encoding
 640 .Ve
 641 .Sp
 642 For a complete list of language and country codes see the gettext manual:
 643 <http://www.gnu.org/software/gettext/manual/gettext.html#Language\-Codes>
 644 .Sp
 645 On Unix systems you can use to command \fIlocale\fR\|(1) to get locale specific
 646 information.
 647 .IP "\fB\s-1LANGUAGE\s0\fR" 4
 648 .IX Item "LANGUAGE"
 649 With the \s-1LANGUAGE\s0 environment variable you can specify a priority list of
 650 languages, separated by colons. Dos2unix gives preference to \s-1LANGUAGE\s0 over \s-1LANG.\s0
 651 For instance, first Dutch and then German: \f(CW\*(C`LANGUAGE=nl:de\*(C'\fR. You have to first
 652 enable localization, by setting \s-1LANG \s0(or \s-1LC_ALL\s0) to a value other than
 653 \&\*(L"C\*(R", before you can use a language priority list through the \s-1LANGUAGE\s0
 654 variable. See also the gettext manual:
 655 <http://www.gnu.org/software/gettext/manual/gettext.html#The\-LANGUAGE\-variable>
 656 .Sp
 657 If you select a language which is not available you will get the
 658 standard English messages.
 659 .IP "\fB\s-1DOS2UNIX_LOCALEDIR\s0\fR" 4
 660 .IX Item "DOS2UNIX_LOCALEDIR"
 661 With the environment variable \s-1DOS2UNIX_LOCALEDIR\s0 the \s-1LOCALEDIR\s0 set
 662 during compilation can be overruled. \s-1LOCALEDIR\s0 is used to find the
 663 language files. The \s-1GNU\s0 default value is \f(CW\*(C`/usr/local/share/locale\*(C'\fR.
 664 Option \fB\-\-version\fR will display the \s-1LOCALEDIR\s0 that is used.
 665 .Sp
 666 Example (\s-1POSIX\s0 shell):
 667 .Sp
 668 .Vb 1
 669 \&    export DOS2UNIX_LOCALEDIR=$HOME/share/locale
 670 .Ve
 671 .SH "RETURN VALUE"
 672 .IX Header "RETURN VALUE"
 673 On success, zero is returned.  When a system error occurs the last system error will be
 674 returned. For other errors 1 is returned.
 675 .PP
 676 The return value is always zero in quiet mode, except when wrong command-line options
 677 are used.
 678 .SH "STANDARDS"
 679 .IX Header "STANDARDS"
 680 <http://en.wikipedia.org/wiki/Text_file>
 681 .PP
 682 <http://en.wikipedia.org/wiki/Carriage_return>
 683 .PP
 684 <http://en.wikipedia.org/wiki/Newline>
 685 .PP
 686 <http://en.wikipedia.org/wiki/Unicode>
 687 .SH "AUTHORS"
 688 .IX Header "AUTHORS"
 689 Benjamin Lin \- <blin@socs.uts.edu.au>
 690 Bernd Johannes Wuebben (mac2unix mode) \- <wuebben@kde.org>,
 691 Christian Wurll (add extra newline) \- <wurll@ira.uka.de>,
 692 Erwin Waterlander \- <waterlan@xs4all.nl> (Maintainer)
 693 .PP
 694 Project page: <http://waterlan.home.xs4all.nl/dos2unix.html>
 695 .PP
 696 SourceForge page: <http://sourceforge.net/projects/dos2unix/>
 697 .SH "SEE ALSO"
 698 .IX Header "SEE ALSO"
 699 \&\fIfile\fR\|(1)
 700 \&\fIfind\fR\|(1)
 701 \&\fIiconv\fR\|(1)
 702 \&\fIlocale\fR\|(1)
 703 \&\fIxargs\fR\|(1)