src/shred.c

   1 /* shred.c - overwrite files and devices to make it harder to recover data
   2
   3    Copyright (C) 1999-2008 Free Software Foundation, Inc.
   4    Copyright (C) 1997, 1998, 1999 Colin Plumb.
   5
   6    This program is free software: you can redistribute it and/or modify
   7    it under the terms of the GNU General Public License as published by
   8    the Free Software Foundation, either version 3 of the License, or
   9    (at your option) any later version.
  10
  11    This program is distributed in the hope that it will be useful,
  12    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14    GNU General Public License for more details.
  15
  16    You should have received a copy of the GNU General Public License
  17    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18
  19    Written by Colin Plumb.  */
  20
  21 /* TODO:
  22    - use consistent non-capitalization in error messages
  23    - add standard GNU copyleft comment
  24
  25   - Add -r/-R/--recursive
  26   - Add -i/--interactive
  27   - Reserve -d
  28   - Add -L
  29   - Add an unlink-all option to emulate rm.
  30  */
  31
  32 /*
  33  * Do a more secure overwrite of given files or devices, to make it harder
  34  * for even very expensive hardware probing to recover the data.
  35  *
  36  * Although this process is also known as "wiping", I prefer the longer
  37  * name both because I think it is more evocative of what is happening and
  38  * because a longer name conveys a more appropriate sense of deliberateness.
  39  *
  40  * For the theory behind this, see "Secure Deletion of Data from Magnetic
  41  * and Solid-State Memory", on line at
  42  * http://www.cs.auckland.ac.nz/~pgut001/pubs/secure_del.html
  43  *
  44  * Just for the record, reversing one or two passes of disk overwrite
  45  * is not terribly difficult with hardware help.  Hook up a good-quality
  46  * digitizing oscilloscope to the output of the head preamplifier and copy
  47  * the high-res digitized data to a computer for some off-line analysis.
  48  * Read the "current" data and average all the pulses together to get an
  49  * "average" pulse on the disk.  Subtract this average pulse from all of
  50  * the actual pulses and you can clearly see the "echo" of the previous
  51  * data on the disk.
  52  *
  53  * Real hard drives have to balance the cost of the media, the head,
  54  * and the read circuitry.  They use better-quality media than absolutely
  55  * necessary to limit the cost of the read circuitry.  By throwing that
  56  * assumption out, and the assumption that you want the data processed
  57  * as fast as the hard drive can spin, you can do better.
  58  *
  59  * If asked to wipe a file, this also unlinks it, renaming it to in a
  60  * clever way to try to leave no trace of the original filename.
  61  *
  62  * This was inspired by a desire to improve on some code titled:
  63  * Wipe V1.0-- Overwrite and delete files.  S. 2/3/96
  64  * but I've rewritten everything here so completely that no trace of
  65  * the original remains.
  66  *
  67  * Thanks to:
  68  * Bob Jenkins, for his good RNG work and patience with the FSF copyright
  69  * paperwork.
  70  * Jim Meyering, for his work merging this into the GNU fileutils while
  71  * still letting me feel a sense of ownership and pride.  Getting me to
  72  * tolerate the GNU brace style was quite a feat of diplomacy.
  73  * Paul Eggert, for lots of useful discussion and code.  I disagree with
  74  * an awful lot of his suggestions, but they're disagreements worth having.
  75  *
  76  * Things to think about:
  77  * - Security: Is there any risk to the race
  78  *   between overwriting and unlinking a file?  Will it do anything
  79  *   drastically bad if told to attack a named pipe or socket?
  80  */
  81
  82 /* The official name of this program (e.g., no `g' prefix).  */
  83 #define PROGRAM_NAME "shred"
  84
  85 #define AUTHORS proper_name ("Colin Plumb")
  86
  87 #include <config.h>
  88
  89 #include <getopt.h>
  90 #include <stdio.h>
  91 #include <assert.h>
  92 #include <setjmp.h>
  93 #include <sys/types.h>
  94
  95 #include "system.h"
  96 #include "xstrtol.h"
  97 #include "error.h"
  98 #include "fcntl--.h"
  99 #include "human.h"
 100 #include "quotearg.h"           /* For quotearg_colon */
 101 #include "randint.h"
 102 #include "randread.h"
 103
 104 /* Default number of times to overwrite.  */
 105 enum { DEFAULT_PASSES = 25 };
 106
 107 /* How many seconds to wait before checking whether to output another
 108    verbose output line.  */
 109 enum { VERBOSE_UPDATE = 5 };
 110
 111 /* Sector size and corresponding mask, for recovering after write failures.
 112    The size must be a power of 2.  */
 113 enum { SECTOR_SIZE = 512 };
 114 enum { SECTOR_MASK = SECTOR_SIZE - 1 };
 115 verify (0 < SECTOR_SIZE && (SECTOR_SIZE & SECTOR_MASK) == 0);
 116
 117 struct Options
 118 {
 119   bool force;           /* -f flag: chmod files if necessary */
 120   size_t n_iterations;  /* -n flag: Number of iterations */
 121   off_t size;           /* -s flag: size of file */
 122   bool remove_file;     /* -u flag: remove file after shredding */
 123   bool verbose;         /* -v flag: Print progress */
 124   bool exact;           /* -x flag: Do not round up file size */
 125   bool zero_fill;       /* -z flag: Add a final zero pass */
 126 };
 127
 128 /* For long options that have no equivalent short option, use a
 129    non-character as a pseudo short option, starting with CHAR_MAX + 1.  */
 130 enum
 131 {
 132   RANDOM_SOURCE_OPTION = CHAR_MAX + 1
 133 };
 134
 135 static struct option const long_opts[] =
 136 {
 137   {"exact", no_argument, NULL, 'x'},
 138   {"force", no_argument, NULL, 'f'},
 139   {"iterations", required_argument, NULL, 'n'},
 140   {"size", required_argument, NULL, 's'},
 141   {"random-source", required_argument, NULL, RANDOM_SOURCE_OPTION},
 142   {"remove", no_argument, NULL, 'u'},
 143   {"verbose", no_argument, NULL, 'v'},
 144   {"zero", no_argument, NULL, 'z'},
 145   {GETOPT_HELP_OPTION_DECL},
 146   {GETOPT_VERSION_OPTION_DECL},
 147   {NULL, 0, NULL, 0}
 148 };
 149
 150 void
 151 usage (int status)
 152 {
 153   if (status != EXIT_SUCCESS)
 154     fprintf (stderr, _("Try `%s --help' for more information.\n"),
 155              program_name);
 156   else
 157     {
 158       printf (_("Usage: %s [OPTION]... FILE...\n"), program_name);
 159       fputs (_("\
 160 Overwrite the specified FILE(s) repeatedly, in order to make it harder\n\
 161 for even very expensive hardware probing to recover the data.\n\
 162 \n\
 163 "), stdout);
 164       fputs (_("\
 165 Mandatory arguments to long options are mandatory for short options too.\n\
 166 "), stdout);
 167       printf (_("\
 168   -f, --force    change permissions to allow writing if necessary\n\
 169   -n, --iterations=N  Overwrite N times instead of the default (%d)\n\
 170       --random-source=FILE  get random bytes from FILE (default /dev/urandom)\n\
 171   -s, --size=N   shred this many bytes (suffixes like K, M, G accepted)\n\
 172 "), DEFAULT_PASSES);
 173       fputs (_("\
 174   -u, --remove   truncate and remove file after overwriting\n\
 175   -v, --verbose  show progress\n\
 176   -x, --exact    do not round file sizes up to the next full block;\n\
 177                    this is the default for non-regular files\n\
 178   -z, --zero     add a final overwrite with zeros to hide shredding\n\
 179 "), stdout);
 180       fputs (HELP_OPTION_DESCRIPTION, stdout);
 181       fputs (VERSION_OPTION_DESCRIPTION, stdout);
 182       fputs (_("\
 183 \n\
 184 If FILE is -, shred standard output.\n\
 185 \n\
 186 Delete FILE(s) if --remove (-u) is specified.  The default is not to remove\n\
 187 the files because it is common to operate on device files like /dev/hda,\n\
 188 and those files usually should not be removed.  When operating on regular\n\
 189 files, most people use the --remove option.\n\
 190 \n\
 191 "), stdout);
 192       fputs (_("\
 193 CAUTION: Note that shred relies on a very important assumption:\n\
 194 that the file system overwrites data in place.  This is the traditional\n\
 195 way to do things, but many modern file system designs do not satisfy this\n\
 196 assumption.  The following are examples of file systems on which shred is\n\
 197 not effective, or is not guaranteed to be effective in all file system modes:\n\
 198 \n\
 199 "), stdout);
 200       fputs (_("\
 201 * log-structured or journaled file systems, such as those supplied with\n\
 202 AIX and Solaris (and JFS, ReiserFS, XFS, Ext3, etc.)\n\
 203 \n\
 204 * file systems that write redundant data and carry on even if some writes\n\
 205 fail, such as RAID-based file systems\n\
 206 \n\
 207 * file systems that make snapshots, such as Network Appliance's NFS server\n\
 208 \n\
 209 "), stdout);
 210       fputs (_("\
 211 * file systems that cache in temporary locations, such as NFS\n\
 212 version 3 clients\n\
 213 \n\
 214 * compressed file systems\n\
 215 \n\
 216 "), stdout);
 217       fputs (_("\
 218 In the case of ext3 file systems, the above disclaimer applies\n\
 219 (and shred is thus of limited effectiveness) only in data=journal mode,\n\
 220 which journals file data in addition to just metadata.  In both the\n\
 221 data=ordered (default) and data=writeback modes, shred works as usual.\n\
 222 Ext3 journaling modes can be changed by adding the data=something option\n\
 223 to the mount options for a particular file system in the /etc/fstab file,\n\
 224 as documented in the mount man page (man mount).\n\
 225 \n\
 226 "), stdout);
 227       fputs (_("\
 228 In addition, file system backups and remote mirrors may contain copies\n\
 229 of the file that cannot be removed, and that will allow a shredded file\n\
 230 to be recovered later.\n\
 231 "), stdout);
 232       emit_bug_reporting_address ();
 233     }
 234   exit (status);
 235 }
 236
 237
 238 /*
 239  * Fill a buffer with a fixed pattern.
 240  *
 241  * The buffer must be at least 3 bytes long, even if
 242  * size is less.  Larger sizes are filled exactly.
 243  */
 244 static void
 245 fillpattern (int type, unsigned char *r, size_t size)
 246 {
 247   size_t i;
 248   unsigned int bits = type & 0xfff;
 249
 250   bits |= bits << 12;
 251   r[0] = (bits >> 4) & 255;
 252   r[1] = (bits >> 8) & 255;
 253   r[2] = bits & 255;
 254   for (i = 3; i < size / 2; i *= 2)
 255     memcpy (r + i, r, i);
 256   if (i < size)
 257     memcpy (r + i, r, size - i);
 258
 259   /* Invert the first bit of every sector. */
 260   if (type & 0x1000)
 261     for (i = 0; i < size; i += SECTOR_SIZE)
 262       r[i] ^= 0x80;
 263 }
 264
 265 /*
 266  * Generate a 6-character (+ nul) pass name string
 267  * FIXME: allow translation of "random".
 268  */
 269 #define PASS_NAME_SIZE 7
 270 static void
 271 passname (unsigned char const *data, char name[PASS_NAME_SIZE])
 272 {
 273   if (data)
 274     sprintf (name, "%02x%02x%02x", data[0], data[1], data[2]);
 275   else
 276     memcpy (name, "random", PASS_NAME_SIZE);
 277 }
 278
 279 /* Return true when it's ok to ignore an fsync or fdatasync
 280    failure that set errno to ERRNO_VAL.  */
 281 static bool
 282 ignorable_sync_errno (int errno_val)
 283 {
 284   return (errno_val == EINVAL
 285           || errno_val == EBADF
 286           /* HP-UX does this */
 287           || errno_val == EISDIR);
 288 }
 289
 290 /* Request that all data for FD be transferred to the corresponding
 291    storage device.  QNAME is the file name (quoted for colons).
 292    Report any errors found.  Return 0 on success, -1
 293    (setting errno) on failure.  It is not an error if fdatasync and/or
 294    fsync is not supported for this file, or if the file is not a
 295    writable file descriptor.  */
 296 static int
 297 dosync (int fd, char const *qname)
 298 {
 299   int err;
 300
 301 #if HAVE_FDATASYNC
 302   if (fdatasync (fd) == 0)
 303     return 0;
 304   err = errno;
 305   if ( ! ignorable_sync_errno (err))
 306     {
 307       error (0, err, _("%s: fdatasync failed"), qname);
 308       errno = err;
 309       return -1;
 310     }
 311 #endif
 312
 313   if (fsync (fd) == 0)
 314     return 0;
 315   err = errno;
 316   if ( ! ignorable_sync_errno (err))
 317     {
 318       error (0, err, _("%s: fsync failed"), qname);
 319       errno = err;
 320       return -1;
 321     }
 322
 323   sync ();
 324   return 0;
 325 }
 326
 327 /* Turn on or off direct I/O mode for file descriptor FD, if possible.
 328    Try to turn it on if ENABLE is true.  Otherwise, try to turn it off.  */
 329 static void
 330 direct_mode (int fd, bool enable)
 331 {
 332   if (O_DIRECT)
 333     {
 334       int fd_flags = fcntl (fd, F_GETFL);
 335       if (0 < fd_flags)
 336         {
 337           int new_flags = (enable
 338                            ? (fd_flags | O_DIRECT)
 339                            : (fd_flags & ~O_DIRECT));
 340           if (new_flags != fd_flags)
 341             fcntl (fd, F_SETFL, new_flags);
 342         }
 343     }
 344
 345 #if HAVE_DIRECTIO && defined DIRECTIO_ON && defined DIRECTIO_OFF
 346   /* This is Solaris-specific.  See the following for details:
 347      http://docs.sun.com/db/doc/816-0213/6m6ne37so?q=directio&a=view  */
 348   directio (fd, enable ? DIRECTIO_ON : DIRECTIO_OFF);
 349 #endif
 350 }
 351
 352 /*
 353  * Do pass number k of n, writing "size" bytes of the given pattern "type"
 354  * to the file descriptor fd.   Qname, k and n are passed in only for verbose
 355  * progress message purposes.  If n == 0, no progress messages are printed.
 356  *
 357  * If *sizep == -1, the size is unknown, and it will be filled in as soon
 358  * as writing fails.
 359  *
 360  * Return 1 on write error, -1 on other error, 0 on success.
 361  */
 362 static int
 363 dopass (int fd, char const *qname, off_t *sizep, int type,
 364         struct randread_source *s, unsigned long int k, unsigned long int n)
 365 {
 366   off_t size = *sizep;
 367   off_t offset;                 /* Current file posiiton */
 368   time_t thresh IF_LINT (= 0);  /* Time to maybe print next status update */
 369   time_t now = 0;               /* Current time */
 370   size_t lim;                   /* Amount of data to try writing */
 371   size_t soff;                  /* Offset into buffer for next write */
 372   ssize_t ssize;                /* Return value from write */
 373
 374   /* Fill pattern buffer.  Aligning it to a 32-bit boundary speeds up randread
 375      in some cases.  */
 376   typedef uint32_t fill_pattern_buffer[3 * 1024];
 377   union
 378   {
 379     fill_pattern_buffer buffer;
 380     char c[sizeof (fill_pattern_buffer)];
 381     unsigned char u[sizeof (fill_pattern_buffer)];
 382   } r;
 383
 384   off_t sizeof_r = sizeof r;
 385   char pass_string[PASS_NAME_SIZE];     /* Name of current pass */
 386   bool write_error = false;
 387   bool first_write = true;
 388
 389   /* Printable previous offset into the file */
 390   char previous_offset_buf[LONGEST_HUMAN_READABLE + 1];
 391   char const *previous_human_offset IF_LINT (= 0);
 392
 393   if (lseek (fd, 0, SEEK_SET) == -1)
 394     {
 395       error (0, errno, _("%s: cannot rewind"), qname);
 396       return -1;
 397     }
 398
 399   /* Constant fill patterns need only be set up once. */
 400   if (type >= 0)
 401     {
 402       lim = (0 <= size && size < sizeof_r ? size : sizeof r);
 403       fillpattern (type, r.u, lim);
 404       passname (r.u, pass_string);
 405     }
 406   else
 407     {
 408       passname (0, pass_string);
 409     }
 410
 411   /* Set position if first status update */
 412   if (n)
 413     {
 414       error (0, 0, _("%s: pass %lu/%lu (%s)..."), qname, k, n, pass_string);
 415       thresh = time (NULL) + VERBOSE_UPDATE;
 416       previous_human_offset = "";
 417     }
 418
 419   offset = 0;
 420   for (;;)
 421     {
 422       /* How much to write this time? */
 423       lim = sizeof r;
 424       if (0 <= size && size - offset < sizeof_r)
 425         {
 426           if (size < offset)
 427             break;
 428           lim = size - offset;
 429           if (!lim)
 430             break;
 431         }
 432       if (type < 0)
 433         randread (s, &r, lim);
 434       /* Loop to retry partial writes. */
 435       for (soff = 0; soff < lim; soff += ssize, first_write = false)
 436         {
 437           ssize = write (fd, r.c + soff, lim - soff);
 438           if (ssize <= 0)
 439             {
 440               if (size < 0 && (ssize == 0 || errno == ENOSPC))
 441                 {
 442                   /* Ah, we have found the end of the file */
 443                   *sizep = size = offset + soff;
 444                   break;
 445                 }
 446               else
 447                 {
 448                   int errnum = errno;
 449                   char buf[INT_BUFSIZE_BOUND (uintmax_t)];
 450
 451                   /* If the first write of the first pass for a given file
 452                      has just failed with EINVAL, turn off direct mode I/O
 453                      and try again.  This works around a bug in linux-2.4
 454                      whereby opening with O_DIRECT would succeed for some
 455                      file system types (e.g., ext3), but any attempt to
 456                      access a file through the resulting descriptor would
 457                      fail with EINVAL.  */
 458                   if (k == 1 && first_write && errno == EINVAL)
 459                     {
 460                       direct_mode (fd, false);
 461                       ssize = 0;
 462                       continue;
 463                     }
 464                   error (0, errnum, _("%s: error writing at offset %s"),
 465                          qname, umaxtostr (offset + soff, buf));
 466
 467                   /* 'shred' is often used on bad media, before throwing it
 468                      out.  Thus, it shouldn't give up on bad blocks.  This
 469                      code works because lim is always a multiple of
 470                      SECTOR_SIZE, except at the end.  */
 471                   verify (sizeof r % SECTOR_SIZE == 0);
 472                   if (errnum == EIO && 0 <= size && (soff | SECTOR_MASK) < lim)
 473                     {
 474                       size_t soff1 = (soff | SECTOR_MASK) + 1;
 475                       if (lseek (fd, offset + soff1, SEEK_SET) != -1)
 476                         {
 477                           /* Arrange to skip this block. */
 478                           ssize = soff1 - soff;
 479                           write_error = true;
 480                           continue;
 481                         }
 482                       error (0, errno, _("%s: lseek failed"), qname);
 483                     }
 484                   return -1;
 485                 }
 486             }
 487         }
 488
 489       /* Okay, we have written "soff" bytes. */
 490
 491       if (offset + soff < offset)
 492         {
 493           error (0, 0, _("%s: file too large"), qname);
 494           return -1;
 495         }
 496
 497       offset += soff;
 498
 499       /* Time to print progress? */
 500       if (n
 501           && ((offset == size && *previous_human_offset)
 502               || thresh <= (now = time (NULL))))
 503         {
 504           char offset_buf[LONGEST_HUMAN_READABLE + 1];
 505           char size_buf[LONGEST_HUMAN_READABLE + 1];
 506           int human_progress_opts = (human_autoscale | human_SI
 507                                      | human_base_1024 | human_B);
 508           char const *human_offset
 509             = human_readable (offset, offset_buf,
 510                               human_floor | human_progress_opts, 1, 1);
 511
 512           if (offset == size
 513               || !STREQ (previous_human_offset, human_offset))
 514             {
 515               if (size < 0)
 516                 error (0, 0, _("%s: pass %lu/%lu (%s)...%s"),
 517                        qname, k, n, pass_string, human_offset);
 518               else
 519                 {
 520                   uintmax_t off = offset;
 521                   int percent = (size == 0
 522                                  ? 100
 523                                  : (off <= TYPE_MAXIMUM (uintmax_t) / 100
 524                                     ? off * 100 / size
 525                                     : off / (size / 100)));
 526                   char const *human_size
 527                     = human_readable (size, size_buf,
 528                                       human_ceiling | human_progress_opts,
 529                                       1, 1);
 530                   if (offset == size)
 531                     human_offset = human_size;
 532                   error (0, 0, _("%s: pass %lu/%lu (%s)...%s/%s %d%%"),
 533                          qname, k, n, pass_string, human_offset, human_size,
 534                          percent);
 535                 }
 536
 537               strcpy (previous_offset_buf, human_offset);
 538               previous_human_offset = previous_offset_buf;
 539               thresh = now + VERBOSE_UPDATE;
 540
 541               /*
 542                * Force periodic syncs to keep displayed progress accurate
 543                * FIXME: Should these be present even if -v is not enabled,
 544                * to keep the buffer cache from filling with dirty pages?
 545                * It's a common problem with programs that do lots of writes,
 546                * like mkfs.
 547                */
 548               if (dosync (fd, qname) != 0)
 549                 {
 550                   if (errno != EIO)
 551                     return -1;
 552                   write_error = true;
 553                 }
 554             }
 555         }
 556     }
 557
 558   /* Force what we just wrote to hit the media. */
 559   if (dosync (fd, qname) != 0)
 560     {
 561       if (errno != EIO)
 562         return -1;
 563       write_error = true;
 564     }
 565
 566   return write_error;
 567 }
 568
 569 /*
 570  * The passes start and end with a random pass, and the passes in between
 571  * are done in random order.  The idea is to deprive someone trying to
 572  * reverse the process of knowledge of the overwrite patterns, so they
 573  * have the additional step of figuring out what was done to the disk
 574  * before they can try to reverse or cancel it.
 575  *
 576  * First, all possible 1-bit patterns.  There are two of them.
 577  * Then, all possible 2-bit patterns.  There are four, but the two
 578  * which are also 1-bit patterns can be omitted.
 579  * Then, all possible 3-bit patterns.  Likewise, 8-2 = 6.
 580  * Then, all possible 4-bit patterns.  16-4 = 12.
 581  *
 582  * The basic passes are:
 583  * 1-bit: 0x000, 0xFFF
 584  * 2-bit: 0x555, 0xAAA
 585  * 3-bit: 0x249, 0x492, 0x924, 0x6DB, 0xB6D, 0xDB6 (+ 1-bit)
 586  *        100100100100         110110110110
 587  *           9   2   4            D   B   6
 588  * 4-bit: 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
 589  *        0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE (+ 1-bit, 2-bit)
 590  * Adding three random passes at the beginning, middle and end
 591  * produces the default 25-pass structure.
 592  *
 593  * The next extension would be to 5-bit and 6-bit patterns.
 594  * There are 30 uncovered 5-bit patterns and 64-8-2 = 46 uncovered
 595  * 6-bit patterns, so they would increase the time required
 596  * significantly.  4-bit patterns are enough for most purposes.
 597  *
 598  * The main gotcha is that this would require a trickier encoding,
 599  * since lcm(2,3,4) = 12 bits is easy to fit into an int, but
 600  * lcm(2,3,4,5) = 60 bits is not.
 601  *
 602  * One extension that is included is to complement the first bit in each
 603  * 512-byte block, to alter the phase of the encoded data in the more
 604  * complex encodings.  This doesn't apply to MFM, so the 1-bit patterns
 605  * are considered part of the 3-bit ones and the 2-bit patterns are
 606  * considered part of the 4-bit patterns.
 607  *
 608  *
 609  * How does the generalization to variable numbers of passes work?
 610  *
 611  * Here's how...
 612  * Have an ordered list of groups of passes.  Each group is a set.
 613  * Take as many groups as will fit, plus a random subset of the
 614  * last partial group, and place them into the passes list.
 615  * Then shuffle the passes list into random order and use that.
 616  *
 617  * One extra detail: if we can't include a large enough fraction of the
 618  * last group to be interesting, then just substitute random passes.
 619  *
 620  * If you want more passes than the entire list of groups can
 621  * provide, just start repeating from the beginning of the list.
 622  */
 623 static int const
 624   patterns[] =
 625 {
 626   -2,                           /* 2 random passes */
 627   2, 0x000, 0xFFF,              /* 1-bit */
 628   2, 0x555, 0xAAA,              /* 2-bit */
 629   -1,                           /* 1 random pass */
 630   6, 0x249, 0x492, 0x6DB, 0x924, 0xB6D, 0xDB6,  /* 3-bit */
 631   12, 0x111, 0x222, 0x333, 0x444, 0x666, 0x777,
 632   0x888, 0x999, 0xBBB, 0xCCC, 0xDDD, 0xEEE,     /* 4-bit */
 633   -1,                           /* 1 random pass */
 634         /* The following patterns have the frst bit per block flipped */
 635   8, 0x1000, 0x1249, 0x1492, 0x16DB, 0x1924, 0x1B6D, 0x1DB6, 0x1FFF,
 636   14, 0x1111, 0x1222, 0x1333, 0x1444, 0x1555, 0x1666, 0x1777,
 637   0x1888, 0x1999, 0x1AAA, 0x1BBB, 0x1CCC, 0x1DDD, 0x1EEE,
 638   -1,                           /* 1 random pass */
 639   0                             /* End */
 640 };
 641
 642 /*
 643  * Generate a random wiping pass pattern with num passes.
 644  * This is a two-stage process.  First, the passes to include
 645  * are chosen, and then they are shuffled into the desired
 646  * order.
 647  */
 648 static void
 649 genpattern (int *dest, size_t num, struct randint_source *s)
 650 {
 651   size_t randpasses;
 652   int const *p;
 653   int *d;
 654   size_t n;
 655   size_t accum, top, swap;
 656   int k;
 657
 658   if (!num)
 659     return;
 660
 661   /* Stage 1: choose the passes to use */
 662   p = patterns;
 663   randpasses = 0;
 664   d = dest;                     /* Destination for generated pass list */
 665   n = num;                      /* Passes remaining to fill */
 666
 667   for (;;)
 668     {
 669       k = *p++;                 /* Block descriptor word */
 670       if (!k)
 671         {                       /* Loop back to the beginning */
 672           p = patterns;
 673         }
 674       else if (k < 0)
 675         {                       /* -k random passes */
 676           k = -k;
 677           if ((size_t) k >= n)
 678             {
 679               randpasses += n;
 680               n = 0;
 681               break;
 682             }
 683           randpasses += k;
 684           n -= k;
 685         }
 686       else if ((size_t) k <= n)
 687         {                       /* Full block of patterns */
 688           memcpy (d, p, k * sizeof (int));
 689           p += k;
 690           d += k;
 691           n -= k;
 692         }
 693       else if (n < 2 || 3 * n < (size_t) k)
 694         {                       /* Finish with random */
 695           randpasses += n;
 696           break;
 697         }
 698       else
 699         {                       /* Pad out with k of the n available */
 700           do
 701             {
 702               if (n == (size_t) k || randint_choose (s, k) < n)
 703                 {
 704                   *d++ = *p;
 705                   n--;
 706                 }
 707               p++;
 708             }
 709           while (n);
 710           break;
 711         }
 712     }
 713   top = num - randpasses;       /* Top of initialized data */
 714   /* assert (d == dest+top); */
 715
 716   /*
 717    * We now have fixed patterns in the dest buffer up to
 718    * "top", and we need to scramble them, with "randpasses"
 719    * random passes evenly spaced among them.
 720    *
 721    * We want one at the beginning, one at the end, and
 722    * evenly spaced in between.  To do this, we basically
 723    * use Bresenham's line draw (a.k.a DDA) algorithm
 724    * to draw a line with slope (randpasses-1)/(num-1).
 725    * (We use a positive accumulator and count down to
 726    * do this.)
 727    *
 728    * So for each desired output value, we do the following:
 729    * - If it should be a random pass, copy the pass type
 730    *   to top++, out of the way of the other passes, and
 731    *   set the current pass to -1 (random).
 732    * - If it should be a normal pattern pass, choose an
 733    *   entry at random between here and top-1 (inclusive)
 734    *   and swap the current entry with that one.
 735    */
 736   randpasses--;                 /* To speed up later math */
 737   accum = randpasses;           /* Bresenham DDA accumulator */
 738   for (n = 0; n < num; n++)
 739     {
 740       if (accum <= randpasses)
 741         {
 742           accum += num - 1;
 743           dest[top++] = dest[n];
 744           dest[n] = -1;
 745         }
 746       else
 747         {
 748           swap = n + randint_choose (s, top - n);
 749           k = dest[n];
 750           dest[n] = dest[swap];
 751           dest[swap] = k;
 752         }
 753       accum -= randpasses;
 754     }
 755   /* assert (top == num); */
 756 }
 757
 758 /*
 759  * The core routine to actually do the work.  This overwrites the first
 760  * size bytes of the given fd.  Return true if successful.
 761  */
 762 static bool
 763 do_wipefd (int fd, char const *qname, struct randint_source *s,
 764            struct Options const *flags)
 765 {
 766   size_t i;
 767   struct stat st;
 768   off_t size;                   /* Size to write, size to read */
 769   unsigned long int n;          /* Number of passes for printing purposes */
 770   int *passarray;
 771   bool ok = true;
 772   struct randread_source *rs;
 773
 774   n = 0;                /* dopass takes n -- 0 to mean "don't print progress" */
 775   if (flags->verbose)
 776     n = flags->n_iterations + flags->zero_fill;
 777
 778   if (fstat (fd, &st))
 779     {
 780       error (0, errno, _("%s: fstat failed"), qname);
 781       return false;
 782     }
 783
 784   /* If we know that we can't possibly shred the file, give up now.
 785      Otherwise, we may go into a infinite loop writing data before we
 786      find that we can't rewind the device.  */
 787   if ((S_ISCHR (st.st_mode) && isatty (fd))
 788       || S_ISFIFO (st.st_mode)
 789       || S_ISSOCK (st.st_mode))
 790     {
 791       error (0, 0, _("%s: invalid file type"), qname);
 792       return false;
 793     }
 794
 795   direct_mode (fd, true);
 796
 797   /* Allocate pass array */
 798   passarray = xnmalloc (flags->n_iterations, sizeof *passarray);
 799
 800   size = flags->size;
 801   if (size == -1)
 802     {
 803       /* Accept a length of zero only if it's a regular file.
 804          For any other type of file, try to get the size another way.  */
 805       if (S_ISREG (st.st_mode))
 806         {
 807           size = st.st_size;
 808           if (size < 0)
 809             {
 810               error (0, 0, _("%s: file has negative size"), qname);
 811               return false;
 812             }
 813         }
 814       else
 815         {
 816           size = lseek (fd, 0, SEEK_END);
 817           if (size <= 0)
 818             {
 819               /* We are unable to determine the length, up front.
 820                  Let dopass do that as part of its first iteration.  */
 821               size = -1;
 822             }
 823         }
 824
 825       /* Allow `rounding up' only for regular files.  */
 826       if (0 <= size && !(flags->exact) && S_ISREG (st.st_mode))
 827         {
 828           size += ST_BLKSIZE (st) - 1 - (size - 1) % ST_BLKSIZE (st);
 829
 830           /* If in rounding up, we've just overflowed, use the maximum.  */
 831           if (size < 0)
 832             size = TYPE_MAXIMUM (off_t);
 833         }
 834     }
 835
 836   /* Schedule the passes in random order. */
 837   genpattern (passarray, flags->n_iterations, s);
 838
 839   rs = randint_get_source (s);
 840
 841   /* Do the work */
 842   for (i = 0; i < flags->n_iterations; i++)
 843     {
 844       int err = dopass (fd, qname, &size, passarray[i], rs, i + 1, n);
 845       if (err)
 846         {
 847           if (err < 0)
 848             {
 849               memset (passarray, 0, flags->n_iterations * sizeof (int));
 850               free (passarray);
 851               return false;
 852             }
 853           ok = false;
 854         }
 855     }
 856
 857   memset (passarray, 0, flags->n_iterations * sizeof (int));
 858   free (passarray);
 859
 860   if (flags->zero_fill)
 861     {
 862       int err = dopass (fd, qname, &size, 0, rs, flags->n_iterations + 1, n);
 863       if (err)
 864         {
 865           if (err < 0)
 866             return false;
 867           ok = false;
 868         }
 869     }
 870
 871   /* Okay, now deallocate the data.  The effect of ftruncate on
 872      non-regular files is unspecified, so don't worry about any
 873      errors reported for them.  */
 874   if (flags->remove_file && ftruncate (fd, 0) != 0
 875       && S_ISREG (st.st_mode))
 876     {
 877       error (0, errno, _("%s: error truncating"), qname);
 878       return false;
 879     }
 880
 881   return ok;
 882 }
 883
 884 /* A wrapper with a little more checking for fds on the command line */
 885 static bool
 886 wipefd (int fd, char const *qname, struct randint_source *s,
 887         struct Options const *flags)
 888 {
 889   int fd_flags = fcntl (fd, F_GETFL);
 890
 891   if (fd_flags < 0)
 892     {
 893       error (0, errno, _("%s: fcntl failed"), qname);
 894       return false;
 895     }
 896   if (fd_flags & O_APPEND)
 897     {
 898       error (0, 0, _("%s: cannot shred append-only file descriptor"), qname);
 899       return false;
 900     }
 901   return do_wipefd (fd, qname, s, flags);
 902 }
 903
 904 /* --- Name-wiping code --- */
 905
 906 /* Characters allowed in a file name - a safe universal set.  */
 907 static char const nameset[] =
 908 "0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_.";
 909
 910 /* Increment NAME (with LEN bytes).  NAME must be a big-endian base N
 911    number with the digits taken from nameset.  Return true if
 912    successful if not (because NAME already has the greatest possible
 913    value.  */
 914
 915 static bool
 916 incname (char *name, size_t len)
 917 {
 918   while (len--)
 919     {
 920       char const *p = strchr (nameset, name[len]);
 921
 922       /* If this character has a successor, use it.  */
 923       if (p[1])
 924         {
 925           name[len] = p[1];
 926           return true;
 927         }
 928
 929       /* Otherwise, set this digit to 0 and increment the prefix.  */
 930       name[len] = nameset[0];
 931     }
 932
 933   return false;
 934 }
 935
 936 /*
 937  * Repeatedly rename a file with shorter and shorter names,
 938  * to obliterate all traces of the file name on any system that
 939  * adds a trailing delimiter to on-disk file names and reuses
 940  * the same directory slot.  Finally, unlink it.
 941  * The passed-in filename is modified in place to the new filename.
 942  * (Which is unlinked if this function succeeds, but is still present if
 943  * it fails for some reason.)
 944  *
 945  * The main loop is written carefully to not get stuck if all possible
 946  * names of a given length are occupied.  It counts down the length from
 947  * the original to 0.  While the length is non-zero, it tries to find an
 948  * unused file name of the given length.  It continues until either the
 949  * name is available and the rename succeeds, or it runs out of names
 950  * to try (incname wraps and returns 1).  Finally, it unlinks the file.
 951  *
 952  * The unlink is Unix-specific, as ANSI-standard remove has more
 953  * portability problems with C libraries making it "safe".  rename
 954  * is ANSI-standard.
 955  *
 956  * To force the directory data out, we try to open the directory and
 957  * invoke fdatasync and/or fsync on it.  This is non-standard, so don't
 958  * insist that it works: just fall back to a global sync in that case.
 959  * This is fairly significantly Unix-specific.  Of course, on any
 960  * file system with synchronous metadata updates, this is unnecessary.
 961  */
 962 static bool
 963 wipename (char *oldname, char const *qoldname, struct Options const *flags)
 964 {
 965   char *newname = xstrdup (oldname);
 966   char *base = last_component (newname);
 967   size_t len = base_len (base);
 968   char *dir = dir_name (newname);
 969   char *qdir = xstrdup (quotearg_colon (dir));
 970   bool first = true;
 971   bool ok = true;
 972
 973   int dir_fd = open (dir, O_RDONLY | O_DIRECTORY | O_NOCTTY | O_NONBLOCK);
 974
 975   if (flags->verbose)
 976     error (0, 0, _("%s: removing"), qoldname);
 977
 978   while (len)
 979     {
 980       memset (base, nameset[0], len);
 981       base[len] = 0;
 982       do
 983         {
 984           struct stat st;
 985           if (lstat (newname, &st) < 0)
 986             {
 987               if (rename (oldname, newname) == 0)
 988                 {
 989                   if (0 <= dir_fd && dosync (dir_fd, qdir) != 0)
 990                     ok = false;
 991                   if (flags->verbose)
 992                     {
 993                       /*
 994                        * People seem to understand this better than talking
 995                        * about renaming oldname.  newname doesn't need
 996                        * quoting because we picked it.  oldname needs to
 997                        * be quoted only the first time.
 998                        */
 999                       char const *old = (first ? qoldname : oldname);
1000                       error (0, 0, _("%s: renamed to %s"), old, newname);
1001                       first = false;
1002                     }
1003                   memcpy (oldname + (base - newname), base, len + 1);
1004                   break;
1005                 }
1006               else
1007                 {
1008                   /* The rename failed: give up on this length.  */
1009                   break;
1010                 }
1011             }
1012           else
1013             {
1014               /* newname exists, so increment BASE so we use another */
1015             }
1016         }
1017       while (incname (base, len));
1018       len--;
1019     }
1020   if (unlink (oldname) != 0)
1021     {
1022       error (0, errno, _("%s: failed to remove"), qoldname);
1023       ok = false;
1024     }
1025   else if (flags->verbose)
1026     error (0, 0, _("%s: removed"), qoldname);
1027   if (0 <= dir_fd)
1028     {
1029       if (dosync (dir_fd, qdir) != 0)
1030         ok = false;
1031       if (close (dir_fd) != 0)
1032         {
1033           error (0, errno, _("%s: failed to close"), qdir);
1034           ok = false;
1035         }
1036     }
1037   free (newname);
1038   free (dir);
1039   free (qdir);
1040   return ok;
1041 }
1042
1043 /*
1044  * Finally, the function that actually takes a filename and grinds
1045  * it into hamburger.
1046  *
1047  * FIXME
1048  * Detail to note: since we do not restore errno to EACCES after
1049  * a failed chmod, we end up printing the error code from the chmod.
1050  * This is actually the error that stopped us from proceeding, so
1051  * it's arguably the right one, and in practice it'll be either EACCES
1052  * again or EPERM, which both give similar error messages.
1053  * Does anyone disagree?
1054  */
1055 static bool
1056 wipefile (char *name, char const *qname,
1057           struct randint_source *s, struct Options const *flags)
1058 {
1059   bool ok;
1060   int fd;
1061
1062   fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1063   if (fd < 0
1064       && (errno == EACCES && flags->force)
1065       && chmod (name, S_IWUSR) == 0)
1066     fd = open (name, O_WRONLY | O_NOCTTY | O_BINARY);
1067   if (fd < 0)
1068     {
1069       error (0, errno, _("%s: failed to open for writing"), qname);
1070       return false;
1071     }
1072
1073   ok = do_wipefd (fd, qname, s, flags);
1074   if (close (fd) != 0)
1075     {
1076       error (0, errno, _("%s: failed to close"), qname);
1077       ok = false;
1078     }
1079   if (ok && flags->remove_file)
1080     ok = wipename (name, qname, flags);
1081   return ok;
1082 }
1083
1084
1085 /* Buffers for random data.  */
1086 static struct randint_source *randint_source;
1087
1088 /* Just on general principles, wipe buffers containing information
1089    that may be related to the possibly-pseudorandom values used during
1090    shredding.  */
1091 static void
1092 clear_random_data (void)
1093 {
1094   randint_all_free (randint_source);
1095 }
1096
1097
1098 int
1099 main (int argc, char **argv)
1100 {
1101   bool ok = true;
1102   struct Options flags = { 0, };
1103   char **file;
1104   int n_files;
1105   int c;
1106   int i;
1107   char const *random_source = NULL;
1108
1109   initialize_main (&argc, &argv);
1110   set_program_name (argv[0]);
1111   setlocale (LC_ALL, "");
1112   bindtextdomain (PACKAGE, LOCALEDIR);
1113   textdomain (PACKAGE);
1114
1115   atexit (close_stdout);
1116
1117   flags.n_iterations = DEFAULT_PASSES;
1118   flags.size = -1;
1119
1120   while ((c = getopt_long (argc, argv, "fn:s:uvxz", long_opts, NULL)) != -1)
1121     {
1122       switch (c)
1123         {
1124         case 'f':
1125           flags.force = true;
1126           break;
1127
1128         case 'n':
1129           {
1130             uintmax_t tmp;
1131             if (xstrtoumax (optarg, NULL, 10, &tmp, NULL) != LONGINT_OK
1132                 || MIN (UINT32_MAX, SIZE_MAX / sizeof (int)) < tmp)
1133               {
1134                 error (EXIT_FAILURE, 0, _("%s: invalid number of passes"),
1135                        quotearg_colon (optarg));
1136               }
1137             flags.n_iterations = tmp;
1138           }
1139           break;
1140
1141         case RANDOM_SOURCE_OPTION:
1142           if (random_source && !STREQ (random_source, optarg))
1143             error (EXIT_FAILURE, 0, _("multiple random sources specified"));
1144           random_source = optarg;
1145           break;
1146
1147         case 'u':
1148           flags.remove_file = true;
1149           break;
1150
1151         case 's':
1152           {
1153             uintmax_t tmp;
1154             if (xstrtoumax (optarg, NULL, 0, &tmp, "cbBkKMGTPEZY0")
1155                 != LONGINT_OK)
1156               {
1157                 error (EXIT_FAILURE, 0, _("%s: invalid file size"),
1158                        quotearg_colon (optarg));
1159               }
1160             flags.size = tmp;
1161           }
1162           break;
1163
1164         case 'v':
1165           flags.verbose = true;
1166           break;
1167
1168         case 'x':
1169           flags.exact = true;
1170           break;
1171
1172         case 'z':
1173           flags.zero_fill = true;
1174           break;
1175
1176         case_GETOPT_HELP_CHAR;
1177
1178         case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
1179
1180         default:
1181           usage (EXIT_FAILURE);
1182         }
1183     }
1184
1185   file = argv + optind;
1186   n_files = argc - optind;
1187
1188   if (n_files == 0)
1189     {
1190       error (0, 0, _("missing file operand"));
1191       usage (EXIT_FAILURE);
1192     }
1193
1194   randint_source = randint_all_new (random_source, SIZE_MAX);
1195   if (! randint_source)
1196     error (EXIT_FAILURE, errno, "%s", quotearg_colon (random_source));
1197   atexit (clear_random_data);
1198
1199   for (i = 0; i < n_files; i++)
1200     {
1201       char *qname = xstrdup (quotearg_colon (file[i]));
1202       if (STREQ (file[i], "-"))
1203         {
1204           ok &= wipefd (STDOUT_FILENO, qname, randint_source, &flags);
1205         }
1206       else
1207         {
1208           /* Plain filename - Note that this overwrites *argv! */
1209           ok &= wipefile (file[i], qname, randint_source, &flags);
1210         }
1211       free (qname);
1212     }
1213
1214   exit (ok ? EXIT_SUCCESS : EXIT_FAILURE);
1215 }
1216 /*
1217  * vim:sw=2:sts=2:
1218  */