1 /* du -- summarize disk usage
2 Copyright (C) 1988-1991, 1995-2004 Free Software Foundation, Inc.
4 This program is free software; you can redistribute it and/or modify
5 it under the terms of the GNU General Public License as published by
6 the Free Software Foundation; either version 2, or (at your option)
9 This program is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 GNU General Public License for more details.
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software Foundation,
16 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
18 /* Differences from the Unix du:
19 * Doesn't simply ignore the names of regular files given as arguments
22 By tege@sics.se, Torbjorn Granlund,
23 and djm@ai.mit.edu, David MacKenzie.
24 Variable blocks added by lm@sgi.com and eggert@twinsun.com.
25 Rewritten to use nftw, then to use fts by Jim Meyering. */
30 #include <sys/types.h>
34 #include "dirname.h" /* for strip_trailing_slashes */
41 #include "readtokens0.h"
48 /* The official name of this program (e.g., no `g' prefix). */
49 #define PROGRAM_NAME "du"
52 "Torbjorn Granlund", "David MacKenzie, Paul Eggert", "Jim Meyering"
55 # define FTS_CROSS_CHECK(Fts) fts_cross_check (Fts)
56 # define DEBUG_OPT "d"
58 # define FTS_CROSS_CHECK(Fts)
62 /* Initial size of the hash table. */
63 #define INITIAL_TABLE_SIZE 103
65 /* Hash structure for inode and device numbers. The separate entry
66 structure makes it easier to rehash "in place". */
74 /* A set of dev/ino pairs. */
75 static Hash_table *htab;
77 /* Name under which this program was invoked. */
80 /* If nonzero, display counts for all files, not just directories. */
81 static int opt_all = 0;
83 /* If nonzero, rather than using the disk usage of each file,
84 use the apparent size (a la stat.st_size). */
85 static int apparent_size = 0;
87 /* If nonzero, count each hard link of files with multiple links. */
88 static int opt_count_all = 0;
90 /* If true, output the NUL byte instead of a newline at the end of each line. */
91 bool opt_nul_terminate_output = false;
93 /* If nonzero, print a grand total at the end. */
94 static int print_grand_total = 0;
96 /* If nonzero, do not add sizes of subdirectories. */
97 static int opt_separate_dirs = 0;
99 /* Show the total for each directory (and file if --all) that is at
100 most MAX_DEPTH levels down from the root of the hierarchy. The root
101 is at level 0, so `du --max-depth=0' is equivalent to `du -s'. */
102 static int max_depth = INT_MAX;
104 /* Human-readable options for output. */
105 static int human_output_opts;
107 /* The units to use when printing sizes. */
108 static uintmax_t output_block_size;
110 /* File name patterns to exclude. */
111 static struct exclude *exclude;
113 /* Grand total size of all args, in bytes. */
114 static uintmax_t tot_size = 0;
116 /* Nonzero indicates that du should exit with EXIT_FAILURE upon completion. */
119 #define IS_DIR_TYPE(Type) \
121 || (Type) == FTS_DNR)
123 /* For long options that have no equivalent short option, use a
124 non-character as a pseudo short option, starting with CHAR_MAX + 1. */
127 APPARENT_SIZE_OPTION = CHAR_MAX + 1,
134 static struct option const long_options[] =
136 {"all", no_argument, NULL, 'a'},
137 {"apparent-size", no_argument, NULL, APPARENT_SIZE_OPTION},
138 {"block-size", required_argument, 0, 'B'},
139 {"bytes", no_argument, NULL, 'b'},
140 {"count-links", no_argument, NULL, 'l'},
141 {"dereference", no_argument, NULL, 'L'},
142 {"dereference-args", no_argument, NULL, 'D'},
143 {"exclude", required_argument, 0, EXCLUDE_OPTION},
144 {"exclude-from", required_argument, 0, 'X'},
145 {"files0-from", required_argument, 0, FILES0_FROM_OPTION},
146 {"human-readable", no_argument, NULL, 'h'},
147 {"si", no_argument, 0, HUMAN_SI_OPTION},
148 {"kilobytes", no_argument, NULL, 'k'}, /* long form is obsolescent */
149 {"max-depth", required_argument, NULL, MAX_DEPTH_OPTION},
150 {"null", no_argument, NULL, '0'},
151 {"megabytes", no_argument, NULL, 'm'}, /* obsolescent */
152 {"no-dereference", no_argument, NULL, 'P'},
153 {"one-file-system", no_argument, NULL, 'x'},
154 {"separate-dirs", no_argument, NULL, 'S'},
155 {"summarize", no_argument, NULL, 's'},
156 {"total", no_argument, NULL, 'c'},
157 {GETOPT_HELP_OPTION_DECL},
158 {GETOPT_VERSION_OPTION_DECL},
165 if (status != EXIT_SUCCESS)
166 fprintf (stderr, _("Try `%s --help' for more information.\n"),
171 Usage: %s [OPTION]... [FILE]...\n\
172 or: %s [OPTION]... --files0-from=F\n\
173 "), program_name, program_name);
175 Summarize disk usage of each FILE, recursively for directories.\n\
179 Mandatory arguments to long options are mandatory for short options too.\n\
182 -a, --all write counts for all files, not just directories\n\
183 --apparent-size print apparent sizes, rather than disk usage; although\n\
184 the apparent size is usually smaller, it may be\n\
185 larger due to holes in (`sparse') files, internal\n\
186 fragmentation, indirect blocks, and the like\n\
187 -B, --block-size=SIZE use SIZE-byte blocks\n\
188 -b, --bytes equivalent to `--apparent-size --block-size=1'\n\
189 -c, --total produce a grand total\n\
190 -D, --dereference-args dereference FILEs that are symbolic links\n\
193 --files0-from=F summarize disk usage of the NUL-terminated file\n\
194 names specified in file F\n\
195 -H like --si, but also evokes a warning; will soon\n\
196 change to be equivalent to --dereference-args (-D)\n\
197 -h, --human-readable print sizes in human readable format (e.g., 1K 234M 2G)\n\
198 --si like -h, but use powers of 1000 not 1024\n\
199 -k like --block-size=1K\n\
200 -l, --count-links count sizes many times if hard linked\n\
203 -L, --dereference dereference all symbolic links\n\
204 -P, --no-dereference don't follow any symbolic links (this is the default)\n\
205 -0, --null end each output line with 0 byte rather than newline\n\
206 -S, --separate-dirs do not include size of subdirectories\n\
207 -s, --summarize display only a total for each argument\n\
210 -x, --one-file-system skip directories on different filesystems\n\
211 -X FILE, --exclude-from=FILE Exclude files that match any pattern in FILE.\n\
212 --exclude=PATTERN Exclude files that match PATTERN.\n\
213 --max-depth=N print the total for a directory (or file, with --all)\n\
214 only if it is N or fewer levels below the command\n\
215 line argument; --max-depth=0 is the same as\n\
218 fputs (HELP_OPTION_DESCRIPTION, stdout);
219 fputs (VERSION_OPTION_DESCRIPTION, stdout);
221 SIZE may be (or may be an integer optionally followed by) one of following:\n\
222 kB 1000, K 1024, MB 1000*1000, M 1024*1024, and so on for G, T, P, E, Z, Y.\n\
224 printf (_("\nReport bugs to <%s>.\n"), PACKAGE_BUGREPORT);
230 entry_hash (void const *x, size_t table_size)
232 struct entry const *p = x;
234 /* Ignoring the device number here should be fine. */
235 /* The cast to uintmax_t prevents negative remainders
236 if st_ino is negative. */
237 return (uintmax_t) p->st_ino % table_size;
240 /* Compare two dev/ino pairs. Return true if they are the same. */
242 entry_compare (void const *x, void const *y)
244 struct entry const *a = x;
245 struct entry const *b = y;
246 return SAME_INODE (*a, *b) ? true : false;
249 /* Try to insert the INO/DEV pair into the global table, HTAB.
250 If the pair is successfully inserted, return zero.
251 Upon failed memory allocation exit nonzero.
252 If the pair is already in the table, return nonzero. */
254 hash_ins (ino_t ino, dev_t dev)
257 struct entry *ent_from_table;
259 ent = xmalloc (sizeof *ent);
263 ent_from_table = hash_insert (htab, ent);
264 if (ent_from_table == NULL)
266 /* Insertion failed due to lack of memory. */
270 if (ent_from_table == ent)
272 /* Insertion succeeded. */
276 /* That pair is already in the table, so ENT was not inserted. Free it. */
282 /* Initialize the hash table. */
286 htab = hash_initialize (INITIAL_TABLE_SIZE, NULL,
287 entry_hash, entry_compare, free);
292 /* Print N_BYTES. Convert it to a readable value before printing. */
295 print_only_size (uintmax_t n_bytes)
297 char buf[LONGEST_HUMAN_READABLE + 1];
298 fputs (human_readable (n_bytes, buf, human_output_opts,
299 1, output_block_size), stdout);
302 /* Print N_BYTES followed by STRING on a line.
303 Convert N_BYTES to a readable value before printing. */
306 print_size (uintmax_t n_bytes, const char *string)
308 print_only_size (n_bytes);
309 printf ("\t%s%c", string, opt_nul_terminate_output ? '\0' : '\n');
313 /* This function is called once for every file system object that fts
314 encounters. fts does a depth-first traversal. This function knows
315 that and accumulates per-directory totals based on changes in
316 the depth of the current entry. */
319 process_file (FTS *fts, FTSENT *ent)
322 uintmax_t size_to_print;
323 static int first_call = 1;
324 static size_t prev_level;
325 static size_t n_alloc;
326 /* The sum of the st_size values of all entries in the single directory
327 at the corresponding level. Although this does include the st_size
328 corresponding to each subdirectory, it does not include the size of
329 any file in a subdirectory. */
330 static uintmax_t *sum_ent;
332 /* The sum of the sizes of all entries in the hierarchy at or below the
333 directory at the specified level. */
334 static uintmax_t *sum_subdir;
337 const char *file = ent->fts_path;
338 const struct stat *sb = ent->fts_statp;
341 /* If necessary, set FTS_SKIP before returning. */
342 skip = excluded_filename (exclude, ent->fts_name);
344 fts_set (fts, ent, FTS_SKIP);
346 switch (ent->fts_info)
349 error (0, ent->fts_errno, _("cannot access %s"), quote (file));
354 /* if (S_ISDIR (ent->fts_statp->st_mode) && FIXME */
355 error (0, ent->fts_errno, _("%s"), quote (file));
360 /* Don't return just yet, since although the directory is not readable,
361 we were able to stat it, so we do have a size. */
362 error (0, ent->fts_errno, _("cannot read directory %s"), quote (file));
370 /* If this is the first (pre-order) encounter with a directory,
371 or if it's the second encounter for a skipped directory, then
372 return right away. */
373 if (ent->fts_info == FTS_D || skip)
376 /* If the file is being excluded or if it has already been counted
377 via a hard link, then don't let it contribute to the sums. */
381 && hash_ins (sb->st_ino, sb->st_dev)))
383 /* Note that we must not simply return here.
384 We still have to update prev_level and maybe propagate
385 some sums up the hierarchy. */
391 size = (apparent_size
393 : ST_NBLOCKS (*sb) * ST_NBLOCKSIZE);
398 n_alloc = ent->fts_level + 10;
399 sum_ent = XCALLOC (uintmax_t, n_alloc);
400 sum_subdir = XCALLOC (uintmax_t, n_alloc);
404 /* FIXME: it's a shame that we need these `size_t' casts to avoid
405 warnings from gcc about `comparison between signed and unsigned'.
406 Probably unavoidable, assuming that the struct members
407 are of type `int' (historical), since I want variables like
408 n_alloc and prev_level to have types that make sense. */
409 if (n_alloc <= (size_t) ent->fts_level)
411 n_alloc = ent->fts_level * 2;
412 sum_ent = XREALLOC (sum_ent, uintmax_t, n_alloc);
413 sum_subdir = XREALLOC (sum_subdir, uintmax_t, n_alloc);
417 size_to_print = size;
421 if ((size_t) ent->fts_level == prev_level)
423 /* This is usually the most common case. Do nothing. */
425 else if (ent->fts_level > prev_level)
427 /* Descending the hierarchy.
428 Clear the accumulators for *all* levels between prev_level
429 and the current one. The depth may change dramatically,
430 e.g., from 1 to 10. */
432 for (i = prev_level + 1; i <= ent->fts_level; i++)
438 else /* ent->fts_level < prev_level */
440 /* Ascending the hierarchy.
441 Process a directory only after all entries in that
442 directory have been processed. When the depth decreases,
443 propagate sums from the children (prev_level) to the parent.
444 Here, the current level is always one smaller than the
446 assert ((size_t) ent->fts_level == prev_level - 1);
447 size_to_print += sum_ent[prev_level];
448 if (!opt_separate_dirs)
449 size_to_print += sum_subdir[prev_level];
450 sum_subdir[ent->fts_level] += (sum_ent[prev_level]
451 + sum_subdir[prev_level]);
455 prev_level = ent->fts_level;
458 /* Let the size of a directory entry contribute to the total for the
459 containing directory, unless --separate-dirs (-S) is specified. */
460 if ( ! (opt_separate_dirs && IS_DIR_TYPE (ent->fts_info)))
461 sum_ent[ent->fts_level] += size;
463 /* Even if this directory is unreadable or we can't chdir into it,
464 do let its size contribute to the total, ... */
467 /* ... but don't print out a total for it, since without the size(s)
468 of any potential entries, it could be very misleading. */
469 if (ent->fts_info == FTS_DNR)
472 /* If we're not counting an entry, e.g., because it's a hard link
473 to a file we've already counted (and --count-links), then don't
474 print a line for it. */
478 if ((IS_DIR_TYPE (ent->fts_info) && ent->fts_level <= max_depth)
479 || ((opt_all && ent->fts_level <= max_depth) || ent->fts_level == 0))
481 print_only_size (size_to_print);
482 fputc ('\t', stdout);
483 fputs (file, stdout);
484 fputc (opt_nul_terminate_output ? '\0' : '\n', stdout);
489 /* Recursively print the sizes of the directories (and, if selected, files)
490 named in FILES, the last entry of which is NULL.
491 BIT_FLAGS controls how fts works.
492 If the fts_open call fails, exit nonzero.
493 Otherwise, return nonzero upon error. */
496 du_files (char **files, int bit_flags)
502 FTS *fts = xfts_open (files, bit_flags, NULL);
508 ent = fts_read (fts);
513 /* FIXME: try to give a better message */
514 error (0, errno, _("fts_read failed"));
519 FTS_CROSS_CHECK (fts);
521 /* This is a space optimization. If we aren't printing totals,
522 then it's ok to clear the duplicate-detection tables after
523 each command line hierarchy has been processed. */
524 if (ent->fts_level == 0 && ent->fts_info == FTS_D && !print_grand_total)
527 process_file (fts, ent);
530 /* Ignore failure, since the only way it can do so is in failing to
531 return to the original directory, and since we're about to exit,
532 that doesn't matter. */
536 if (print_grand_total)
537 print_size (tot_size, _("total"));
543 main (int argc, char **argv)
547 int max_depth_specified = 0;
550 char *files_from = NULL;
553 /* Bit flags that control how fts works. */
554 int bit_flags = FTS_PHYSICAL | FTS_TIGHT_CYCLE_CHECK;
556 /* If nonzero, display only a total for each argument. */
557 int opt_summarize_only = 0;
562 initialize_main (&argc, &argv);
563 program_name = argv[0];
564 setlocale (LC_ALL, "");
565 bindtextdomain (PACKAGE, LOCALEDIR);
566 textdomain (PACKAGE);
568 atexit (close_stdout);
570 exclude = new_exclude ();
572 human_output_opts = human_options (getenv ("DU_BLOCK_SIZE"), false,
576 while ((c = getopt_long (argc, argv, DEBUG_OPT "0abchHklmsxB:DLPSX:",
577 long_options, NULL)) != -1)
582 case 0: /* Long option. */
592 opt_nul_terminate_output = true;
599 case APPARENT_SIZE_OPTION:
605 human_output_opts = 0;
606 output_block_size = 1;
610 print_grand_total = 1;
614 human_output_opts = human_autoscale | human_SI | human_base_1024;
615 output_block_size = 1;
619 error (0, 0, _("WARNING: use --si, not -H; the meaning of the -H\
620 option will soon\nchange to be the same as that of --dereference-args (-D)"));
622 case HUMAN_SI_OPTION:
623 human_output_opts = human_autoscale | human_SI;
624 output_block_size = 1;
628 human_output_opts = 0;
629 output_block_size = 1024;
632 case MAX_DEPTH_OPTION: /* --max-depth=N */
633 if (xstrtol (optarg, NULL, 0, &tmp_long, NULL) == LONGINT_OK
634 && 0 <= tmp_long && tmp_long <= INT_MAX)
636 max_depth_specified = 1;
637 max_depth = (int) tmp_long;
641 error (0, 0, _("invalid maximum depth %s"),
647 case 'm': /* obsolescent: FIXME: remove in 2005. */
648 human_output_opts = 0;
649 output_block_size = 1024 * 1024;
657 opt_summarize_only = 1;
661 bit_flags |= FTS_XDEV;
665 human_output_opts = human_options (optarg, true, &output_block_size);
668 case 'D': /* This will eventually be 'H' (-H), too. */
669 bit_flags = FTS_COMFOLLOW;
672 case 'L': /* --dereference */
673 bit_flags = FTS_LOGICAL;
676 case 'P': /* --no-dereference */
677 bit_flags = FTS_PHYSICAL;
681 opt_separate_dirs = 1;
685 if (add_exclude_file (add_exclude, exclude, optarg,
686 EXCLUDE_WILDCARDS, '\n'))
688 error (0, errno, "%s", quotearg_colon (optarg));
693 case FILES0_FROM_OPTION:
698 add_exclude (exclude, optarg, EXCLUDE_WILDCARDS);
701 case_GETOPT_HELP_CHAR;
703 case_GETOPT_VERSION_CHAR (PROGRAM_NAME, AUTHORS);
711 usage (EXIT_FAILURE);
713 if (opt_all && opt_summarize_only)
715 error (0, 0, _("cannot both summarize and show all entries"));
716 usage (EXIT_FAILURE);
719 if (opt_summarize_only && max_depth_specified && max_depth == 0)
722 _("warning: summarizing is the same as using --max-depth=0"));
725 if (opt_summarize_only && max_depth_specified && max_depth != 0)
728 _("warning: summarizing conflicts with --max-depth=%d"),
730 usage (EXIT_FAILURE);
733 if (opt_summarize_only)
740 /* When using --files0-from=F, you may not specify any files
741 on the command-line. */
743 error (EXIT_FAILURE, 0,
744 _("%s: you may not specify command-line arguments with\
745 --files0-from"), quotearg_colon (argv[optind]));
747 istream = (STREQ (files_from, "-") ? stdin : fopen (files_from, "r"));
749 error (EXIT_FAILURE, errno, _("cannot open %s for reading"),
752 readtokens0_init (&tok);
754 if (! readtokens0 (istream, &tok) || fclose (istream) != 0)
755 error (EXIT_FAILURE, 0, _("cannot read file names from %s"),
762 files = (optind < argc ? argv + optind : cwd_only);
765 /* Initialize the hash structure for inode numbers. */
768 /* Report and filter out any empty file names before invoking fts.
769 This works around a glitch in fts, which fails immediately
770 (without looking at the other file names) when given an empty
790 /* Using the standard `filename:line-number:' prefix here is
791 not totally appropriate, since NUL is the separator, not NL,
792 but it might be better than nothing. */
793 unsigned long int file_number = j + 1;
794 error (0, 0, "%s:%lu: %s", quotearg_colon (files_from),
795 file_number, _("invalid zero-length file name"));
798 error (0, 0, "%s", _("invalid zero-length file name"));
805 fail |= du_files (files, bit_flags);
807 /* This isn't really necessary, but it does ensure we
808 exercise this function. */
810 readtokens0_free (&tok);
814 exit (fail || G_fail ? EXIT_FAILURE : EXIT_SUCCESS);