1 /* FDUPES Copyright (c) 1999-2018 Adrian Lopez
3 Permission is hereby granted, free of charge, to any person
4 obtaining a copy of this software and associated documentation files
5 (the "Software"), to deal in the Software without restriction,
6 including without limitation the rights to use, copy, modify, merge,
7 publish, distribute, sublicense, and/or sell copies of the Software,
8 and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
39 #ifdef HAVE_NCURSESW_CURSES_H
40 #include <ncursesw/curses.h>
44 #include "ncurses-interface.h"
52 long long minsize = -1;
53 long long maxsize = -1;
63 ordertype_t ordertype = ORDER_MTIME;
65 #define CHUNK_SIZE 8192
67 #define INPUT_SIZE 256
69 #define PARTIAL_MD5_SIZE 4096
71 #define MD5_DIGEST_LENGTH 16
75 TODO: Partial sums (for working with very large files).
77 typedef struct _signature
80 md5_byte_t digest[16];
83 typedef struct _signatures
86 signature_t *signatures;
91 typedef struct _filetree {
93 struct _filetree *left;
94 struct _filetree *right;
97 void escapefilename(char *escape_list, char **filename_ptr)
104 filename = *filename_ptr;
106 tmp = (char*) malloc(strlen(filename) * 2 + 1);
108 errormsg("out of memory!\n");
112 for (x = 0, tx = 0; x < strlen(filename); x++) {
113 if (strchr(escape_list, filename[x]) != NULL) tmp[tx++] = '\\';
114 tmp[tx++] = filename[x];
120 *filename_ptr = realloc(*filename_ptr, strlen(tmp) + 1);
121 if (*filename_ptr == NULL) {
122 errormsg("out of memory!\n");
125 strcpy(*filename_ptr, tmp);
129 dev_t getdevice(char *filename) {
132 if (stat(filename, &s) != 0) return 0;
137 ino_t getinode(char *filename) {
140 if (stat(filename, &s) != 0) return 0;
145 char *fmttime(time_t t) {
148 strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M", localtime(&t));
153 char **cloneargs(int argc, char **argv)
158 args = (char **) malloc(sizeof(char*) * argc);
160 errormsg("out of memory!\n");
164 for (x = 0; x < argc; x++) {
165 args[x] = (char*) malloc(strlen(argv[x]) + 1);
166 if (args[x] == NULL) {
168 errormsg("out of memory!\n");
172 strcpy(args[x], argv[x]);
178 int findarg(char *arg, int start, int argc, char **argv)
182 for (x = start; x < argc; x++)
183 if (strcmp(argv[x], arg) == 0)
189 /* Find the first non-option argument after specified option. */
190 int nonoptafter(char *option, int argc, char **oldargv,
191 char **newargv, int optind)
198 targetind = findarg(option, 1, argc, oldargv);
200 for (x = optind; x < argc; x++) {
201 testind = findarg(newargv[x], startat, argc, oldargv);
202 if (testind > targetind) return x;
203 else startat = testind;
209 void getfilestats(file_t *file, struct stat *info, struct stat *linfo)
211 file->size = info->st_size;;
212 file->inode = info->st_ino;
213 file->device = info->st_dev;
214 file->ctime = info->st_ctime;
215 file->mtime = info->st_mtime;
218 int grokdir(char *dir, file_t **filelistp, struct stat *logfile_status)
222 struct dirent *dirinfo;
227 static int progress = 0;
228 static char indicator[] = "-\\|/";
229 char *fullname, *name;
234 errormsg("could not chdir to %s\n", dir);
238 while ((dirinfo = readdir(cd)) != NULL) {
239 if (strcmp(dirinfo->d_name, ".") && strcmp(dirinfo->d_name, "..")) {
240 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
241 fprintf(stderr, "\rBuilding file list %c ", indicator[progress]);
242 progress = (progress + 1) % 4;
245 newfile = (file_t*) malloc(sizeof(file_t));
248 errormsg("out of memory!\n");
251 } else newfile->next = *filelistp;
255 newfile->crcsignature = NULL;
256 newfile->crcpartial = NULL;
257 newfile->duplicates = NULL;
258 newfile->hasdupes = 0;
260 newfile->d_name = (char*)malloc(strlen(dir)+strlen(dirinfo->d_name)+2);
262 if (!newfile->d_name) {
263 errormsg("out of memory!\n");
269 strcpy(newfile->d_name, dir);
270 lastchar = strlen(dir) - 1;
271 if (lastchar >= 0 && dir[lastchar] != '/')
272 strcat(newfile->d_name, "/");
273 strcat(newfile->d_name, dirinfo->d_name);
275 if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
276 fullname = strdup(newfile->d_name);
279 errormsg("out of memory!\n");
284 name = basename(fullname);
285 if (name[0] == '.' && strcmp(name, ".") && strcmp(name, "..") ) {
286 free(newfile->d_name);
293 if (stat(newfile->d_name, &info) == -1) {
294 free(newfile->d_name);
299 if (!S_ISDIR(info.st_mode) && (((info.st_size == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) || info.st_size < minsize || (info.st_size > maxsize && maxsize != -1)))) {
300 free(newfile->d_name);
305 if (info.st_dev == logfile_status->st_dev && info.st_ino == logfile_status->st_ino)
307 free(newfile->d_name);
312 if (lstat(newfile->d_name, &linfo) == -1) {
313 free(newfile->d_name);
318 if (S_ISDIR(info.st_mode)) {
319 if (ISFLAG(flags, F_RECURSE) && (ISFLAG(flags, F_FOLLOWLINKS) || !S_ISLNK(linfo.st_mode)))
320 filecount += grokdir(newfile->d_name, filelistp, logfile_status);
321 free(newfile->d_name);
324 if (S_ISREG(linfo.st_mode) || (S_ISLNK(linfo.st_mode) && ISFLAG(flags, F_FOLLOWLINKS))) {
325 getfilestats(newfile, &info, &linfo);
326 *filelistp = newfile;
329 free(newfile->d_name);
341 md5_byte_t *getcrcsignatureuntil(char *filename, off_t fsize, off_t max_read)
345 static md5_byte_t digest[MD5_DIGEST_LENGTH];
346 static md5_byte_t chunk[CHUNK_SIZE];
351 if (max_read != 0 && fsize > max_read)
354 file = fopen(filename, "rb");
356 errormsg("error opening file %s\n", filename);
361 toread = (fsize >= CHUNK_SIZE) ? CHUNK_SIZE : fsize;
362 if (fread(chunk, toread, 1, file) != 1) {
363 errormsg("error reading from file %s\n", filename);
367 md5_append(&state, chunk, toread);
371 md5_finish(&state, digest);
378 md5_byte_t *getcrcsignature(char *filename, off_t fsize)
380 return getcrcsignatureuntil(filename, fsize, 0);
383 md5_byte_t *getcrcpartialsignature(char *filename, off_t fsize)
385 return getcrcsignatureuntil(filename, fsize, PARTIAL_MD5_SIZE);
388 int md5cmp(const md5_byte_t *a, const md5_byte_t *b)
392 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
396 else if (a[x] > b[x])
403 void md5copy(md5_byte_t *to, const md5_byte_t *from)
407 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
411 void purgetree(filetree_t *checktree)
413 if (checktree->left != NULL) purgetree(checktree->left);
415 if (checktree->right != NULL) purgetree(checktree->right);
420 int registerfile(filetree_t **branch, file_t *file)
422 *branch = (filetree_t*) malloc(sizeof(filetree_t));
423 if (*branch == NULL) {
424 errormsg("out of memory!\n");
428 (*branch)->file = file;
429 (*branch)->left = NULL;
430 (*branch)->right = NULL;
435 int same_permissions(char* name1, char* name2)
439 if (stat(name1, &s1) != 0) return -1;
440 if (stat(name2, &s2) != 0) return -1;
442 return (s1.st_mode == s2.st_mode &&
443 s1.st_uid == s2.st_uid &&
444 s1.st_gid == s2.st_gid);
447 int is_hardlink(filetree_t *checktree, file_t *file)
451 if ((file->inode == checktree->file->inode) &&
452 (file->device == checktree->file->device))
455 if (checktree->file->hasdupes)
457 dupe = checktree->file->duplicates;
460 if ((file->inode == dupe->inode) &&
461 (file->device == dupe->device))
464 dupe = dupe->duplicates;
465 } while (dupe != NULL);
471 /* check whether two paths represent the same file (deleting one would delete the other) */
472 int is_same_file(file_t *file_a, file_t *file_b)
480 struct stat dirstat_a;
481 struct stat dirstat_b;
483 /* if files on different devices and/or different inodes, they are not the same file */
484 if (file_a->device != file_b->device || file_a->inode != file_b->inode)
487 /* copy filenames (basename and dirname may modify these) */
488 filename_a = strdup(file_a->d_name);
492 filename_b = strdup(file_b->d_name);
496 /* get file basenames */
497 basename_a = basename(filename_a);
498 memmove(filename_a, basename_a, strlen(basename_a) + 1);
500 basename_b = basename(filename_b);
501 memmove(filename_b, basename_b, strlen(basename_b) + 1);
503 /* if files have different names, they are not the same file */
504 if (strcmp(filename_a, filename_b) != 0)
512 strcpy(filename_a, file_a->d_name);
513 strcpy(filename_b, file_b->d_name);
515 /* get directory names */
516 dirname_a = dirname(filename_a);
517 if (stat(dirname_a, &dirstat_a) != 0)
524 dirname_b = dirname(filename_b);
525 if (stat(dirname_b, &dirstat_b) != 0)
535 /* if directories on which files reside are different, they are not the same file */
536 if (dirstat_a.st_dev != dirstat_b.st_dev || dirstat_a.st_ino != dirstat_b.st_ino)
539 /* same device, inode, filename, and directory; therefore, same file */
543 /* check whether given tree node already contains a copy of given file */
544 int has_same_file(filetree_t *checktree, file_t *file)
548 if (is_same_file(checktree->file, file))
551 if (checktree->file->hasdupes)
553 dupe = checktree->file->duplicates;
556 if (is_same_file(dupe, file))
559 dupe = dupe->duplicates;
560 } while (dupe != NULL);
566 file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
569 md5_byte_t *crcsignature;
571 if (ISFLAG(flags, F_CONSIDERHARDLINKS))
573 /* If node already contains file, we don't want to add it again.
575 if (has_same_file(checktree, file))
580 /* If device and inode fields are equal one of the files is a
581 hard link to the other or the files have been listed twice
582 unintentionally. We don't want to flag these files as
583 duplicates unless the user specifies otherwise.
585 if (is_hardlink(checktree, file))
589 if (file->size < checktree->file->size)
592 if (file->size > checktree->file->size) cmpresult = 1;
594 if (ISFLAG(flags, F_PERMISSIONS) &&
595 !same_permissions(file->d_name, checktree->file->d_name))
598 if (checktree->file->crcpartial == NULL) {
599 crcsignature = getcrcpartialsignature(checktree->file->d_name, checktree->file->size);
600 if (crcsignature == NULL) {
601 errormsg ("cannot read file %s\n", checktree->file->d_name);
605 checktree->file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
606 if (checktree->file->crcpartial == NULL) {
607 errormsg("out of memory\n");
610 md5copy(checktree->file->crcpartial, crcsignature);
613 if (file->crcpartial == NULL) {
614 crcsignature = getcrcpartialsignature(file->d_name, file->size);
615 if (crcsignature == NULL) {
616 errormsg ("cannot read file %s\n", file->d_name);
620 file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
621 if (file->crcpartial == NULL) {
622 errormsg("out of memory\n");
625 md5copy(file->crcpartial, crcsignature);
628 cmpresult = md5cmp(file->crcpartial, checktree->file->crcpartial);
629 /*if (cmpresult != 0) errormsg(" on %s vs %s\n", file->d_name, checktree->file->d_name);*/
631 if (cmpresult == 0) {
632 if (checktree->file->crcsignature == NULL) {
633 crcsignature = getcrcsignature(checktree->file->d_name, checktree->file->size);
634 if (crcsignature == NULL) return NULL;
636 checktree->file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
637 if (checktree->file->crcsignature == NULL) {
638 errormsg("out of memory\n");
641 md5copy(checktree->file->crcsignature, crcsignature);
644 if (file->crcsignature == NULL) {
645 crcsignature = getcrcsignature(file->d_name, file->size);
646 if (crcsignature == NULL) return NULL;
648 file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
649 if (file->crcsignature == NULL) {
650 errormsg("out of memory\n");
653 md5copy(file->crcsignature, crcsignature);
656 cmpresult = md5cmp(file->crcsignature, checktree->file->crcsignature);
657 /*if (cmpresult != 0) errormsg("P on %s vs %s\n",
658 file->d_name, checktree->file->d_name);
659 else errormsg("P F on %s vs %s\n", file->d_name,
660 checktree->file->d_name);
661 printf("%s matches %s\n", file->d_name, checktree->file->d_name);*/
666 if (checktree->left != NULL) {
667 return checkmatch(root, checktree->left, file);
669 registerfile(&(checktree->left), file);
672 } else if (cmpresult > 0) {
673 if (checktree->right != NULL) {
674 return checkmatch(root, checktree->right, file);
676 registerfile(&(checktree->right), file);
681 return &checktree->file;
685 /* Do a bit-for-bit comparison in case two different files produce the
686 same signature. Unlikely, but better safe than sorry. */
688 int confirmmatch(FILE *file1, FILE *file2)
690 unsigned char c1[CHUNK_SIZE];
691 unsigned char c2[CHUNK_SIZE];
695 fseek(file1, 0, SEEK_SET);
696 fseek(file2, 0, SEEK_SET);
699 r1 = fread(c1, sizeof(unsigned char), sizeof(c1), file1);
700 r2 = fread(c2, sizeof(unsigned char), sizeof(c2), file2);
702 if (r1 != r2) return 0; /* file lengths are different */
703 if (memcmp (c1, c2, r1)) return 0; /* file contents are different */
709 void summarizematches(file_t *files)
712 double numbytes = 0.0;
716 while (files != NULL)
722 tmpfile = files->duplicates;
723 while (tmpfile != NULL)
726 numbytes += files->size;
727 tmpfile = tmpfile->duplicates;
735 printf("No duplicates found.\n\n");
738 if (numbytes < 1024.0)
739 printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n", numfiles, numsets, numbytes);
740 else if (numbytes <= (1000.0 * 1000.0))
741 printf("%d duplicate files (in %d sets), occupying %.1f kilobytes\n\n", numfiles, numsets, numbytes / 1000.0);
743 printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n", numfiles, numsets, numbytes / (1000.0 * 1000.0));
748 void printmatches(file_t *files)
752 while (files != NULL) {
753 if (files->hasdupes) {
754 if (!ISFLAG(flags, F_OMITFIRST)) {
755 if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", (long long int)files->size,
756 (files->size != 1) ? "s " : " ");
757 if (ISFLAG(flags, F_SHOWTIME))
758 printf("%s ", fmttime(files->mtime));
759 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
760 printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
762 tmpfile = files->duplicates;
763 while (tmpfile != NULL) {
764 if (ISFLAG(flags, F_SHOWTIME))
765 printf("%s ", fmttime(tmpfile->mtime));
766 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &tmpfile->d_name);
767 printf("%s%c", tmpfile->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
768 tmpfile = tmpfile->duplicates;
779 #define REVISE_APPEND "_tmp"
780 char *revisefilename(char *path, int seq)
787 digits = numdigits(seq);
788 newpath = malloc(strlen(path) + strlen(REVISE_APPEND) + digits + 1);
789 if (!newpath) return newpath;
791 scratch = malloc(strlen(path) + 1);
792 if (!scratch) return newpath;
794 strcpy(scratch, path);
795 dot = strrchr(scratch, '.');
799 sprintf(newpath, "%s%s%d.%s", scratch, REVISE_APPEND, seq, dot + 1);
804 sprintf(newpath, "%s%s%d", path, REVISE_APPEND, seq);
812 int relink(char *oldfile, char *newfile)
819 od = getdevice(oldfile);
820 oi = getinode(oldfile);
822 if (link(oldfile, newfile) != 0)
825 /* make sure we're working with the right file (the one we created) */
826 nd = getdevice(newfile);
827 ni = getinode(newfile);
829 if (nd != od || oi != ni)
830 return 0; /* file is not what we expected */
835 void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile)
852 struct log_info *loginfo;
858 if (curfile->hasdupes) {
862 tmpfile = curfile->duplicates;
865 tmpfile = tmpfile->duplicates;
868 if (counter > max) max = counter;
871 curfile = curfile->next;
876 dupelist = (file_t**) malloc(sizeof(file_t*) * max);
877 preserve = (int*) malloc(sizeof(int) * max);
878 preservestr = (char*) malloc(INPUT_SIZE);
880 if (!dupelist || !preserve || !preservestr) {
881 errormsg("out of memory\n");
887 loginfo = log_open(logfile, &log_error);
889 register_sigint_handler();
892 if (files->hasdupes) {
895 dupelist[counter] = files;
899 if (ISFLAG(flags, F_SHOWTIME))
900 printf("[%d] [%s] %s\n", counter, fmttime(files->mtime), files->d_name);
902 printf("[%d] %s\n", counter, files->d_name);
905 tmpfile = files->duplicates;
908 dupelist[++counter] = tmpfile;
911 if (ISFLAG(flags, F_SHOWTIME))
912 printf("[%d] [%s] %s\n", counter, fmttime(tmpfile->mtime), tmpfile->d_name);
914 printf("[%d] %s\n", counter, tmpfile->d_name);
916 tmpfile = tmpfile->duplicates;
919 if (prompt) printf("\n");
921 if (!prompt) /* preserve only the first file */
924 for (x = 2; x <= counter; x++) preserve[x] = 0;
927 else /* prompt for files to preserve */
930 printf("Set %d of %d, preserve files [1 - %d, all, quit]",
931 curgroup, groups, counter);
932 if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", (long long int)files->size,
933 (files->size != 1) ? "s " : " ");
937 if (!fgets(preservestr, INPUT_SIZE, tty))
939 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
940 preservestr[1] = '\0';
957 i = strlen(preservestr) - 1;
959 while (preservestr[i]!='\n'){ /* tail of buffer must be a newline */
961 realloc(preservestr, strlen(preservestr) + 1 + INPUT_SIZE);
962 if (!tstr) { /* couldn't allocate memory, treat as fatal */
963 errormsg("out of memory!\n");
968 if (!fgets(preservestr + i + 1, INPUT_SIZE, tty))
970 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
971 preservestr[1] = '\0';
974 i = strlen(preservestr)-1;
977 if (strcmp(preservestr, "q\n") == 0 || strcmp(preservestr, "quit\n") == 0)
991 for (x = 1; x <= counter; x++) preserve[x] = 0;
993 token = strtok(preservestr, " ,\n");
995 while (token != NULL) {
996 if (strcasecmp(token, "all") == 0 || strcasecmp(token, "a") == 0)
997 for (x = 0; x <= counter; x++) preserve[x] = 1;
1000 sscanf(token, "%d", &number);
1001 if (number > 0 && number <= counter) preserve[number] = 1;
1003 token = strtok(NULL, " ,\n");
1006 for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x];
1007 } while (sum < 1); /* make sure we've preserved at least one file */
1012 log_begin_set(loginfo);
1014 for (x = 1; x <= counter; x++) {
1017 printf(" [+] %s\n", dupelist[x]->d_name);
1020 log_file_remaining(loginfo, dupelist[x]->d_name);
1023 if (remove(dupelist[x]->d_name) == 0) {
1024 printf(" [-] %s\n", dupelist[x]->d_name);
1027 log_file_deleted(loginfo, dupelist[x]->d_name);
1029 printf(" [!] %s ", dupelist[x]->d_name);
1030 printf("-- unable to delete file!\n");
1033 log_file_remaining(loginfo, dupelist[x]->d_name);
1040 log_end_set(loginfo);
1043 files = files->next;
1054 int sort_pairs_by_arrival(file_t *f1, file_t *f2)
1056 if (f2->duplicates != 0)
1057 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1059 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1062 int sort_pairs_by_ctime(file_t *f1, file_t *f2)
1064 if (f1->ctime < f2->ctime)
1065 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1066 else if (f1->ctime > f2->ctime)
1067 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1072 int sort_pairs_by_mtime(file_t *f1, file_t *f2)
1074 if (f1->mtime < f2->mtime)
1075 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1076 else if (f1->mtime > f2->mtime)
1077 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1079 return sort_pairs_by_ctime(f1, f2);
1082 int sort_pairs_by_filename(file_t *f1, file_t *f2)
1084 int strvalue = strcmp(f1->d_name, f2->d_name);
1085 return !ISFLAG(flags, F_REVERSE) ? strvalue : -strvalue;
1088 void registerpair(file_t **matchlist, file_t *newmatch,
1089 int (*comparef)(file_t *f1, file_t *f2))
1094 (*matchlist)->hasdupes = 1;
1097 traverse = *matchlist;
1100 if (comparef(newmatch, traverse) <= 0)
1102 newmatch->duplicates = traverse;
1106 *matchlist = newmatch; /* update pointer to head of list */
1108 newmatch->hasdupes = 1;
1109 traverse->hasdupes = 0; /* flag is only for first file in dupe chain */
1112 back->duplicates = newmatch;
1118 if (traverse->duplicates == 0)
1120 traverse->duplicates = newmatch;
1123 traverse->hasdupes = 1;
1130 traverse = traverse->duplicates;
1134 void deletesuccessor(file_t **existing, file_t *duplicate,
1135 int (*comparef)(file_t *f1, file_t *f2), struct log_info *loginfo)
1140 if (comparef(duplicate, *existing) >= 0)
1142 to_keep = *existing;
1143 to_delete = duplicate;
1147 to_keep = duplicate;
1148 to_delete = *existing;
1150 *existing = duplicate;
1153 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1155 printf(" [+] %s\n", to_keep->d_name);
1158 log_file_remaining(loginfo, to_keep->d_name);
1160 if (remove(to_delete->d_name) == 0) {
1161 printf(" [-] %s\n", to_delete->d_name);
1164 log_file_deleted(loginfo, to_delete->d_name);
1166 printf(" [!] %s ", to_delete->d_name);
1167 printf("-- unable to delete file!\n");
1170 log_file_remaining(loginfo, to_delete->d_name);
1178 printf("Usage: fdupes [options] DIRECTORY...\n\n");
1180 printf(" -r --recurse \tfor every directory given follow subdirectories\n");
1181 printf(" \tencountered within\n");
1182 printf(" -R --recurse: \tfor each directory given after this option follow\n");
1183 printf(" \tsubdirectories encountered within (note the ':' at\n");
1184 printf(" \tthe end of the option, manpage for more details)\n");
1185 printf(" -s --symlinks \tfollow symlinks\n");
1186 printf(" -H --hardlinks \tnormally, when two or more files point to the same\n");
1187 printf(" \tdisk area they are treated as non-duplicates; this\n");
1188 printf(" \toption will change this behavior\n");
1189 printf(" -G --minsize=SIZE\tconsider only files greater than or equal to SIZE\n");
1190 printf(" -L --maxsize=SIZE\tconsider only files less than or equal to SIZE\n");
1191 printf(" -n --noempty \texclude zero-length files from consideration\n");
1192 printf(" -A --nohidden \texclude hidden files from consideration\n");
1193 printf(" -f --omitfirst \tomit the first file in each set of matches\n");
1194 printf(" -1 --sameline \tlist each set of matches on a single line\n");
1195 printf(" -S --size \tshow size of duplicate files\n");
1196 printf(" -t --time \tshow modification time of duplicate files\n");
1197 printf(" -m --summarize \tsummarize dupe information\n");
1198 printf(" -q --quiet \thide progress indicator\n");
1199 printf(" -d --delete \tprompt user for files to preserve and delete all\n");
1200 printf(" \tothers; important: under particular circumstances,\n");
1201 printf(" \tdata may be lost when using this option together\n");
1202 printf(" \twith -s or --symlinks, or when specifying a\n");
1203 printf(" \tparticular directory more than once; refer to the\n");
1204 printf(" \tfdupes documentation for additional information\n");
1206 printf(" -P --plain \twith --delete, use line-based prompt (as with older\n");
1207 printf(" \tversions of fdupes) instead of screen-mode interface\n");
1209 printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n");
1210 printf(" \teach set of duplicates and delete the rest without\n");
1211 printf(" \tprompting the user\n");
1212 printf(" -I --immediate \tdelete duplicates as they are encountered, without\n");
1213 printf(" \tgrouping into sets; implies --noprompt\n");
1214 printf(" -p --permissions \tdon't consider files with different owner/group or\n");
1215 printf(" \tpermission bits as duplicates\n");
1216 printf(" -o --order=BY \tselect sort order for output and deleting; by file\n");
1217 printf(" \tmodification time (BY='time'; default), status\n");
1218 printf(" \tchange time (BY='ctime'), or filename (BY='name')\n");
1219 printf(" -i --reverse \treverse order while sorting\n");
1220 printf(" -l --log=LOGFILE \tlog file deletion choices to LOGFILE\n");
1221 printf(" -v --version \tdisplay fdupes version\n");
1222 printf(" -h --help \tdisplay this help message\n\n");
1223 #ifndef HAVE_GETOPT_H
1224 printf("Note: Long options are not supported in this fdupes build.\n\n");
1228 int main(int argc, char **argv) {
1233 file_t *files = NULL;
1235 file_t **match = NULL;
1236 filetree_t *checktree = NULL;
1242 struct log_info *loginfo;
1244 struct stat logfile_status;
1247 #ifdef HAVE_GETOPT_H
1248 static struct option long_options[] =
1250 { "omitfirst", 0, 0, 'f' },
1251 { "recurse", 0, 0, 'r' },
1252 { "recurse:", 0, 0, 'R' },
1253 { "quiet", 0, 0, 'q' },
1254 { "sameline", 0, 0, '1' },
1255 { "size", 0, 0, 'S' },
1256 { "time", 0, 0, 't' },
1257 { "symlinks", 0, 0, 's' },
1258 { "hardlinks", 0, 0, 'H' },
1259 { "minsize", 1, 0, 'G' },
1260 { "maxsize", 1, 0, 'L' },
1261 { "noempty", 0, 0, 'n' },
1262 { "nohidden", 0, 0, 'A' },
1263 { "delete", 0, 0, 'd' },
1264 { "plain", 0, 0, 'P' },
1265 { "version", 0, 0, 'v' },
1266 { "help", 0, 0, 'h' },
1267 { "noprompt", 0, 0, 'N' },
1268 { "immediate", 0, 0, 'I'},
1269 { "summarize", 0, 0, 'm'},
1270 { "summary", 0, 0, 'm' },
1271 { "permissions", 0, 0, 'p' },
1272 { "order", 1, 0, 'o' },
1273 { "reverse", 0, 0, 'i' },
1274 { "log", 1, 0, 'l' },
1277 #define GETOPT getopt_long
1279 #define GETOPT getopt
1282 program_name = argv[0];
1284 setlocale(LC_CTYPE, "");
1286 oldargv = cloneargs(argc, argv);
1288 while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:"
1289 #ifdef HAVE_GETOPT_H
1290 , long_options, NULL
1295 SETFLAG(flags, F_OMITFIRST);
1298 SETFLAG(flags, F_RECURSE);
1301 SETFLAG(flags, F_RECURSEAFTER);
1304 SETFLAG(flags, F_HIDEPROGRESS);
1307 SETFLAG(flags, F_DSAMELINE);
1310 SETFLAG(flags, F_SHOWSIZE);
1313 SETFLAG(flags, F_SHOWTIME);
1316 SETFLAG(flags, F_FOLLOWLINKS);
1319 SETFLAG(flags, F_CONSIDERHARDLINKS);
1322 minsize = strtoll(optarg, &endptr, 10);
1323 if (optarg[0] == '\0' || *endptr != '\0' || minsize < 0)
1325 errormsg("invalid value for --minsize: '%s'\n", optarg);
1330 maxsize = strtoll(optarg, &endptr, 10);
1331 if (optarg[0] == '\0' || *endptr != '\0' || maxsize < 0)
1333 errormsg("invalid value for --maxsize: '%s'\n", optarg);
1338 SETFLAG(flags, F_EXCLUDEEMPTY);
1341 SETFLAG(flags, F_EXCLUDEHIDDEN);
1344 SETFLAG(flags, F_DELETEFILES);
1347 SETFLAG(flags, F_PLAINPROMPT);
1350 printf("fdupes %s\n", VERSION);
1356 SETFLAG(flags, F_NOPROMPT);
1359 SETFLAG(flags, F_IMMEDIATE);
1362 SETFLAG(flags, F_SUMMARIZEMATCHES);
1365 SETFLAG(flags, F_PERMISSIONS);
1368 if (!strcasecmp("name", optarg)) {
1369 ordertype = ORDER_NAME;
1370 } else if (!strcasecmp("time", optarg)) {
1371 ordertype = ORDER_MTIME;
1372 } else if (!strcasecmp("ctime", optarg)) {
1373 ordertype = ORDER_CTIME;
1375 errormsg("invalid value for --order: '%s'\n", optarg);
1380 SETFLAG(flags, F_REVERSE);
1383 loginfo = log_open(logfile=optarg, &log_error);
1386 if (log_error == LOG_ERROR_NOT_A_LOG_FILE)
1387 errormsg("%s: doesn't look like an fdupes log file\n", logfile);
1389 errormsg("%s: could not open log file\n", logfile);
1395 if (stat(logfile, &logfile_status) != 0)
1397 errormsg("could not read log file status\n");
1403 fprintf(stderr, "Try `fdupes --help' for more information.\n");
1408 if (optind >= argc) {
1409 errormsg("no directories specified\n");
1413 if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
1414 errormsg("options --recurse and --recurse: are not compatible\n");
1418 if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
1419 errormsg("options --summarize and --delete are not compatible\n");
1423 if (ISFLAG(flags, F_RECURSEAFTER)) {
1424 firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind);
1426 if (firstrecurse == argc)
1427 firstrecurse = nonoptafter("-R", argc, oldargv, argv, optind);
1429 if (firstrecurse == argc) {
1430 errormsg("-R option must be isolated from other options\n");
1434 /* F_RECURSE is not set for directories before --recurse: */
1435 for (x = optind; x < firstrecurse; x++)
1436 filecount += grokdir(argv[x], &files, &logfile_status);
1438 /* Set F_RECURSE for directories after --recurse: */
1439 SETFLAG(flags, F_RECURSE);
1441 for (x = firstrecurse; x < argc; x++)
1442 filecount += grokdir(argv[x], &files, &logfile_status);
1444 for (x = optind; x < argc; x++)
1445 filecount += grokdir(argv[x], &files, &logfile_status);
1449 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1457 registerfile(&checktree, curfile);
1459 match = checkmatch(&checktree, checktree, curfile);
1461 if (match != NULL) {
1462 file1 = fopen(curfile->d_name, "rb");
1464 curfile = curfile->next;
1468 file2 = fopen((*match)->d_name, "rb");
1471 curfile = curfile->next;
1475 if (confirmmatch(file1, file2)) {
1476 if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_IMMEDIATE))
1477 deletesuccessor(match, curfile,
1478 ordertype == ORDER_MTIME ? sort_pairs_by_mtime :
1479 ordertype == ORDER_CTIME ? sort_pairs_by_ctime :
1480 sort_pairs_by_filename, loginfo );
1482 registerpair(match, curfile,
1483 ordertype == ORDER_MTIME ? sort_pairs_by_mtime :
1484 ordertype == ORDER_CTIME ? sort_pairs_by_ctime :
1485 sort_pairs_by_filename );
1492 curfile = curfile->next;
1494 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1495 fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
1496 (int)((float) progress / (float) filecount * 100.0));
1501 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1503 if (ISFLAG(flags, F_DELETEFILES))
1505 if (ISFLAG(flags, F_NOPROMPT))
1507 deletefiles(files, 0, 0, logfile);
1512 if (!ISFLAG(flags, F_PLAINPROMPT))
1514 if (newterm(getenv("TERM"), stdout, stdin) != 0)
1516 deletefiles_ncurses(files, logfile);
1520 errormsg("could not enter screen mode; falling back to plain mode\n\n");
1521 SETFLAG(flags, F_PLAINPROMPT);
1525 if (ISFLAG(flags, F_PLAINPROMPT))
1527 if (freopen("/dev/tty", "r", stdin) == NULL)
1529 errormsg("could not open terminal for input\n");
1533 deletefiles(files, 1, stdin, logfile);
1536 if (freopen("/dev/tty", "r", stdin) == NULL)
1538 errormsg("could not open terminal for input\n");
1542 deletefiles(files, 1, stdin, logfile);
1549 if (ISFLAG(flags, F_SUMMARIZEMATCHES))
1550 summarizematches(files);
1554 printmatches(files);
1557 curfile = files->next;
1558 free(files->d_name);
1559 free(files->crcsignature);
1560 free(files->crcpartial);
1565 for (x = 0; x < argc; x++)
1570 purgetree(checktree);