1 /* FDUPES Copyright (c) 1999-2018 Adrian Lopez
3 Permission is hereby granted, free of charge, to any person
4 obtaining a copy of this software and associated documentation files
5 (the "Software"), to deal in the Software without restriction,
6 including without limitation the rights to use, copy, modify, merge,
7 publish, distribute, sublicense, and/or sell copies of the Software,
8 and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
39 #ifdef HAVE_NCURSESW_CURSES_H
40 #include <ncursesw/curses.h>
44 #include "ncurses-interface.h"
52 long long minsize = -1;
53 long long maxsize = -1;
63 ordertype_t ordertype = ORDER_MTIME;
65 #define CHUNK_SIZE 8192
67 #define INPUT_SIZE 256
69 #define PARTIAL_MD5_SIZE 4096
71 #define MD5_DIGEST_LENGTH 16
75 TODO: Partial sums (for working with very large files).
77 typedef struct _signature
80 md5_byte_t digest[16];
83 typedef struct _signatures
86 signature_t *signatures;
91 typedef struct _filetree {
93 struct _filetree *left;
94 struct _filetree *right;
97 void escapefilename(char *escape_list, char **filename_ptr)
104 filename = *filename_ptr;
106 tmp = (char*) malloc(strlen(filename) * 2 + 1);
108 errormsg("out of memory!\n");
112 for (x = 0, tx = 0; x < strlen(filename); x++) {
113 if (strchr(escape_list, filename[x]) != NULL) tmp[tx++] = '\\';
114 tmp[tx++] = filename[x];
120 *filename_ptr = realloc(*filename_ptr, strlen(tmp) + 1);
121 if (*filename_ptr == NULL) {
122 errormsg("out of memory!\n");
125 strcpy(*filename_ptr, tmp);
129 dev_t getdevice(char *filename) {
132 if (stat(filename, &s) != 0) return 0;
137 ino_t getinode(char *filename) {
140 if (stat(filename, &s) != 0) return 0;
145 char *fmttime(time_t t) {
148 strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M", localtime(&t));
153 char **cloneargs(int argc, char **argv)
158 args = (char **) malloc(sizeof(char*) * argc);
160 errormsg("out of memory!\n");
164 for (x = 0; x < argc; x++) {
165 args[x] = (char*) malloc(strlen(argv[x]) + 1);
166 if (args[x] == NULL) {
168 errormsg("out of memory!\n");
172 strcpy(args[x], argv[x]);
178 int findarg(char *arg, int start, int argc, char **argv)
182 for (x = start; x < argc; x++)
183 if (strcmp(argv[x], arg) == 0)
189 /* Find the first non-option argument after specified option. */
190 int nonoptafter(char *option, int argc, char **oldargv,
191 char **newargv, int optind)
198 targetind = findarg(option, 1, argc, oldargv);
200 for (x = optind; x < argc; x++) {
201 testind = findarg(newargv[x], startat, argc, oldargv);
202 if (testind > targetind) return x;
203 else startat = testind;
209 void getfilestats(file_t *file, struct stat *info, struct stat *linfo)
211 file->size = info->st_size;;
212 file->inode = info->st_ino;
213 file->device = info->st_dev;
214 file->ctime = info->st_ctime;
215 file->mtime = info->st_mtime;
218 int grokdir(char *dir, file_t **filelistp, struct stat *logfile_status)
222 struct dirent *dirinfo;
227 static int progress = 0;
228 static char indicator[] = "-\\|/";
229 char *fullname, *name;
234 errormsg("could not chdir to %s\n", dir);
238 while ((dirinfo = readdir(cd)) != NULL) {
239 if (strcmp(dirinfo->d_name, ".") && strcmp(dirinfo->d_name, "..")) {
240 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
241 fprintf(stderr, "\rBuilding file list %c ", indicator[progress]);
242 progress = (progress + 1) % 4;
245 newfile = (file_t*) malloc(sizeof(file_t));
248 errormsg("out of memory!\n");
251 } else newfile->next = *filelistp;
255 newfile->crcsignature = NULL;
256 newfile->crcpartial = NULL;
257 newfile->duplicates = NULL;
258 newfile->hasdupes = 0;
260 newfile->d_name = (char*)malloc(strlen(dir)+strlen(dirinfo->d_name)+2);
262 if (!newfile->d_name) {
263 errormsg("out of memory!\n");
269 strcpy(newfile->d_name, dir);
270 lastchar = strlen(dir) - 1;
271 if (lastchar >= 0 && dir[lastchar] != '/')
272 strcat(newfile->d_name, "/");
273 strcat(newfile->d_name, dirinfo->d_name);
275 if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
276 fullname = strdup(newfile->d_name);
279 errormsg("out of memory!\n");
284 name = basename(fullname);
285 if (name[0] == '.' && strcmp(name, ".") && strcmp(name, "..") ) {
286 free(newfile->d_name);
294 if (stat(newfile->d_name, &info) == -1) {
295 free(newfile->d_name);
300 if (!S_ISDIR(info.st_mode) && (((info.st_size == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) || info.st_size < minsize || (info.st_size > maxsize && maxsize != -1)))) {
301 free(newfile->d_name);
307 if (info.st_dev == logfile_status->st_dev && info.st_ino == logfile_status->st_ino)
309 free(newfile->d_name);
314 if (lstat(newfile->d_name, &linfo) == -1) {
315 free(newfile->d_name);
320 if (S_ISDIR(info.st_mode)) {
321 if (ISFLAG(flags, F_RECURSE) && (ISFLAG(flags, F_FOLLOWLINKS) || !S_ISLNK(linfo.st_mode)))
322 filecount += grokdir(newfile->d_name, filelistp, logfile_status);
323 free(newfile->d_name);
326 if (S_ISREG(linfo.st_mode) || (S_ISLNK(linfo.st_mode) && ISFLAG(flags, F_FOLLOWLINKS))) {
327 getfilestats(newfile, &info, &linfo);
328 *filelistp = newfile;
331 free(newfile->d_name);
343 md5_byte_t *getcrcsignatureuntil(char *filename, off_t fsize, off_t max_read)
347 static md5_byte_t digest[MD5_DIGEST_LENGTH];
348 static md5_byte_t chunk[CHUNK_SIZE];
353 if (max_read != 0 && fsize > max_read)
356 file = fopen(filename, "rb");
358 errormsg("error opening file %s\n", filename);
363 toread = (fsize >= CHUNK_SIZE) ? CHUNK_SIZE : fsize;
364 if (fread(chunk, toread, 1, file) != 1) {
365 errormsg("error reading from file %s\n", filename);
369 md5_append(&state, chunk, toread);
373 md5_finish(&state, digest);
380 md5_byte_t *getcrcsignature(char *filename, off_t fsize)
382 return getcrcsignatureuntil(filename, fsize, 0);
385 md5_byte_t *getcrcpartialsignature(char *filename, off_t fsize)
387 return getcrcsignatureuntil(filename, fsize, PARTIAL_MD5_SIZE);
390 int md5cmp(const md5_byte_t *a, const md5_byte_t *b)
394 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
398 else if (a[x] > b[x])
405 void md5copy(md5_byte_t *to, const md5_byte_t *from)
409 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
413 void purgetree(filetree_t *checktree)
415 if (checktree->left != NULL) purgetree(checktree->left);
417 if (checktree->right != NULL) purgetree(checktree->right);
422 int registerfile(filetree_t **branch, file_t *file)
424 *branch = (filetree_t*) malloc(sizeof(filetree_t));
425 if (*branch == NULL) {
426 errormsg("out of memory!\n");
430 (*branch)->file = file;
431 (*branch)->left = NULL;
432 (*branch)->right = NULL;
437 int same_permissions(char* name1, char* name2)
441 if (stat(name1, &s1) != 0) return -1;
442 if (stat(name2, &s2) != 0) return -1;
444 return (s1.st_mode == s2.st_mode &&
445 s1.st_uid == s2.st_uid &&
446 s1.st_gid == s2.st_gid);
449 int is_hardlink(filetree_t *checktree, file_t *file)
453 if ((file->inode == checktree->file->inode) &&
454 (file->device == checktree->file->device))
457 if (checktree->file->hasdupes)
459 dupe = checktree->file->duplicates;
462 if ((file->inode == dupe->inode) &&
463 (file->device == dupe->device))
466 dupe = dupe->duplicates;
467 } while (dupe != NULL);
473 /* check whether two paths represent the same file (deleting one would delete the other) */
474 int is_same_file(file_t *file_a, file_t *file_b)
482 struct stat dirstat_a;
483 struct stat dirstat_b;
485 /* if files on different devices and/or different inodes, they are not the same file */
486 if (file_a->device != file_b->device || file_a->inode != file_b->inode)
489 /* copy filenames (basename and dirname may modify these) */
490 filename_a = strdup(file_a->d_name);
494 filename_b = strdup(file_b->d_name);
498 /* get file basenames */
499 basename_a = basename(filename_a);
500 memmove(filename_a, basename_a, strlen(basename_a) + 1);
502 basename_b = basename(filename_b);
503 memmove(filename_b, basename_b, strlen(basename_b) + 1);
505 /* if files have different names, they are not the same file */
506 if (strcmp(filename_a, filename_b) != 0)
514 strcpy(filename_a, file_a->d_name);
515 strcpy(filename_b, file_b->d_name);
517 /* get directory names */
518 dirname_a = dirname(filename_a);
519 if (stat(dirname_a, &dirstat_a) != 0)
526 dirname_b = dirname(filename_b);
527 if (stat(dirname_b, &dirstat_b) != 0)
537 /* if directories on which files reside are different, they are not the same file */
538 if (dirstat_a.st_dev != dirstat_b.st_dev || dirstat_a.st_ino != dirstat_b.st_ino)
541 /* same device, inode, filename, and directory; therefore, same file */
545 /* check whether given tree node already contains a copy of given file */
546 int has_same_file(filetree_t *checktree, file_t *file)
550 if (is_same_file(checktree->file, file))
553 if (checktree->file->hasdupes)
555 dupe = checktree->file->duplicates;
558 if (is_same_file(dupe, file))
561 dupe = dupe->duplicates;
562 } while (dupe != NULL);
568 file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
571 md5_byte_t *crcsignature;
573 if (ISFLAG(flags, F_CONSIDERHARDLINKS))
575 /* If node already contains file, we don't want to add it again.
577 if (has_same_file(checktree, file))
582 /* If device and inode fields are equal one of the files is a
583 hard link to the other or the files have been listed twice
584 unintentionally. We don't want to flag these files as
585 duplicates unless the user specifies otherwise.
587 if (is_hardlink(checktree, file))
591 if (file->size < checktree->file->size)
594 if (file->size > checktree->file->size) cmpresult = 1;
596 if (ISFLAG(flags, F_PERMISSIONS) &&
597 !same_permissions(file->d_name, checktree->file->d_name))
600 if (checktree->file->crcpartial == NULL) {
601 crcsignature = getcrcpartialsignature(checktree->file->d_name, checktree->file->size);
602 if (crcsignature == NULL) {
603 errormsg ("cannot read file %s\n", checktree->file->d_name);
607 checktree->file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
608 if (checktree->file->crcpartial == NULL) {
609 errormsg("out of memory\n");
612 md5copy(checktree->file->crcpartial, crcsignature);
615 if (file->crcpartial == NULL) {
616 crcsignature = getcrcpartialsignature(file->d_name, file->size);
617 if (crcsignature == NULL) {
618 errormsg ("cannot read file %s\n", file->d_name);
622 file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
623 if (file->crcpartial == NULL) {
624 errormsg("out of memory\n");
627 md5copy(file->crcpartial, crcsignature);
630 cmpresult = md5cmp(file->crcpartial, checktree->file->crcpartial);
631 /*if (cmpresult != 0) errormsg(" on %s vs %s\n", file->d_name, checktree->file->d_name);*/
633 if (cmpresult == 0) {
634 if (checktree->file->crcsignature == NULL) {
635 crcsignature = getcrcsignature(checktree->file->d_name, checktree->file->size);
636 if (crcsignature == NULL) return NULL;
638 checktree->file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
639 if (checktree->file->crcsignature == NULL) {
640 errormsg("out of memory\n");
643 md5copy(checktree->file->crcsignature, crcsignature);
646 if (file->crcsignature == NULL) {
647 crcsignature = getcrcsignature(file->d_name, file->size);
648 if (crcsignature == NULL) return NULL;
650 file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
651 if (file->crcsignature == NULL) {
652 errormsg("out of memory\n");
655 md5copy(file->crcsignature, crcsignature);
658 cmpresult = md5cmp(file->crcsignature, checktree->file->crcsignature);
659 /*if (cmpresult != 0) errormsg("P on %s vs %s\n",
660 file->d_name, checktree->file->d_name);
661 else errormsg("P F on %s vs %s\n", file->d_name,
662 checktree->file->d_name);
663 printf("%s matches %s\n", file->d_name, checktree->file->d_name);*/
668 if (checktree->left != NULL) {
669 return checkmatch(root, checktree->left, file);
671 registerfile(&(checktree->left), file);
674 } else if (cmpresult > 0) {
675 if (checktree->right != NULL) {
676 return checkmatch(root, checktree->right, file);
678 registerfile(&(checktree->right), file);
683 return &checktree->file;
687 /* Do a bit-for-bit comparison in case two different files produce the
688 same signature. Unlikely, but better safe than sorry. */
690 int confirmmatch(FILE *file1, FILE *file2)
692 unsigned char c1[CHUNK_SIZE];
693 unsigned char c2[CHUNK_SIZE];
697 fseek(file1, 0, SEEK_SET);
698 fseek(file2, 0, SEEK_SET);
701 r1 = fread(c1, sizeof(unsigned char), sizeof(c1), file1);
702 r2 = fread(c2, sizeof(unsigned char), sizeof(c2), file2);
704 if (r1 != r2) return 0; /* file lengths are different */
705 if (memcmp (c1, c2, r1)) return 0; /* file contents are different */
711 void summarizematches(file_t *files)
714 double numbytes = 0.0;
718 while (files != NULL)
724 tmpfile = files->duplicates;
725 while (tmpfile != NULL)
728 numbytes += files->size;
729 tmpfile = tmpfile->duplicates;
737 printf("No duplicates found.\n\n");
740 if (numbytes < 1024.0)
741 printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n", numfiles, numsets, numbytes);
742 else if (numbytes <= (1000.0 * 1000.0))
743 printf("%d duplicate files (in %d sets), occupying %.1f kilobytes\n\n", numfiles, numsets, numbytes / 1000.0);
745 printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n", numfiles, numsets, numbytes / (1000.0 * 1000.0));
750 void printmatches(file_t *files)
754 while (files != NULL) {
755 if (files->hasdupes) {
756 if (!ISFLAG(flags, F_OMITFIRST)) {
757 if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", (long long int)files->size,
758 (files->size != 1) ? "s " : " ");
759 if (ISFLAG(flags, F_SHOWTIME))
760 printf("%s ", fmttime(files->mtime));
761 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
762 printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
764 tmpfile = files->duplicates;
765 while (tmpfile != NULL) {
766 if (ISFLAG(flags, F_SHOWTIME))
767 printf("%s ", fmttime(tmpfile->mtime));
768 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &tmpfile->d_name);
769 printf("%s%c", tmpfile->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
770 tmpfile = tmpfile->duplicates;
781 #define REVISE_APPEND "_tmp"
782 char *revisefilename(char *path, int seq)
789 digits = numdigits(seq);
790 newpath = malloc(strlen(path) + strlen(REVISE_APPEND) + digits + 1);
791 if (!newpath) return newpath;
793 scratch = malloc(strlen(path) + 1);
794 if (!scratch) return newpath;
796 strcpy(scratch, path);
797 dot = strrchr(scratch, '.');
801 sprintf(newpath, "%s%s%d.%s", scratch, REVISE_APPEND, seq, dot + 1);
806 sprintf(newpath, "%s%s%d", path, REVISE_APPEND, seq);
814 int relink(char *oldfile, char *newfile)
821 od = getdevice(oldfile);
822 oi = getinode(oldfile);
824 if (link(oldfile, newfile) != 0)
827 /* make sure we're working with the right file (the one we created) */
828 nd = getdevice(newfile);
829 ni = getinode(newfile);
831 if (nd != od || oi != ni)
832 return 0; /* file is not what we expected */
837 void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile)
854 struct log_info *loginfo;
860 if (curfile->hasdupes) {
864 tmpfile = curfile->duplicates;
867 tmpfile = tmpfile->duplicates;
870 if (counter > max) max = counter;
873 curfile = curfile->next;
878 dupelist = (file_t**) malloc(sizeof(file_t*) * max);
879 preserve = (int*) malloc(sizeof(int) * max);
880 preservestr = (char*) malloc(INPUT_SIZE);
882 if (!dupelist || !preserve || !preservestr) {
883 errormsg("out of memory\n");
889 loginfo = log_open(logfile, &log_error);
891 register_sigint_handler();
894 if (files->hasdupes) {
897 dupelist[counter] = files;
901 if (ISFLAG(flags, F_SHOWTIME))
902 printf("[%d] [%s] %s\n", counter, fmttime(files->mtime), files->d_name);
904 printf("[%d] %s\n", counter, files->d_name);
907 tmpfile = files->duplicates;
910 dupelist[++counter] = tmpfile;
913 if (ISFLAG(flags, F_SHOWTIME))
914 printf("[%d] [%s] %s\n", counter, fmttime(tmpfile->mtime), tmpfile->d_name);
916 printf("[%d] %s\n", counter, tmpfile->d_name);
918 tmpfile = tmpfile->duplicates;
921 if (prompt) printf("\n");
923 if (!prompt) /* preserve only the first file */
926 for (x = 2; x <= counter; x++) preserve[x] = 0;
929 else /* prompt for files to preserve */
932 printf("Set %d of %d, preserve files [1 - %d, all, quit]",
933 curgroup, groups, counter);
934 if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", (long long int)files->size,
935 (files->size != 1) ? "s " : " ");
939 if (!fgets(preservestr, INPUT_SIZE, tty))
941 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
942 preservestr[1] = '\0';
959 i = strlen(preservestr) - 1;
961 while (preservestr[i]!='\n'){ /* tail of buffer must be a newline */
963 realloc(preservestr, strlen(preservestr) + 1 + INPUT_SIZE);
964 if (!tstr) { /* couldn't allocate memory, treat as fatal */
965 errormsg("out of memory!\n");
970 if (!fgets(preservestr + i + 1, INPUT_SIZE, tty))
972 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
973 preservestr[1] = '\0';
976 i = strlen(preservestr)-1;
979 if (strcmp(preservestr, "q\n") == 0 || strcmp(preservestr, "quit\n") == 0)
993 for (x = 1; x <= counter; x++) preserve[x] = 0;
995 token = strtok(preservestr, " ,\n");
997 while (token != NULL) {
998 if (strcasecmp(token, "all") == 0 || strcasecmp(token, "a") == 0)
999 for (x = 0; x <= counter; x++) preserve[x] = 1;
1002 sscanf(token, "%d", &number);
1003 if (number > 0 && number <= counter) preserve[number] = 1;
1005 token = strtok(NULL, " ,\n");
1008 for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x];
1009 } while (sum < 1); /* make sure we've preserved at least one file */
1014 log_begin_set(loginfo);
1016 for (x = 1; x <= counter; x++) {
1019 printf(" [+] %s\n", dupelist[x]->d_name);
1022 log_file_remaining(loginfo, dupelist[x]->d_name);
1025 if (remove(dupelist[x]->d_name) == 0) {
1026 printf(" [-] %s\n", dupelist[x]->d_name);
1029 log_file_deleted(loginfo, dupelist[x]->d_name);
1031 printf(" [!] %s ", dupelist[x]->d_name);
1032 printf("-- unable to delete file!\n");
1035 log_file_remaining(loginfo, dupelist[x]->d_name);
1042 log_end_set(loginfo);
1045 files = files->next;
1056 int sort_pairs_by_arrival(file_t *f1, file_t *f2)
1058 if (f2->duplicates != 0)
1059 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1061 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1064 int sort_pairs_by_ctime(file_t *f1, file_t *f2)
1066 if (f1->ctime < f2->ctime)
1067 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1068 else if (f1->ctime > f2->ctime)
1069 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1074 int sort_pairs_by_mtime(file_t *f1, file_t *f2)
1076 if (f1->mtime < f2->mtime)
1077 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1078 else if (f1->mtime > f2->mtime)
1079 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1081 return sort_pairs_by_ctime(f1, f2);
1084 int sort_pairs_by_filename(file_t *f1, file_t *f2)
1086 int strvalue = strcmp(f1->d_name, f2->d_name);
1087 return !ISFLAG(flags, F_REVERSE) ? strvalue : -strvalue;
1090 void registerpair(file_t **matchlist, file_t *newmatch,
1091 int (*comparef)(file_t *f1, file_t *f2))
1096 (*matchlist)->hasdupes = 1;
1099 traverse = *matchlist;
1102 if (comparef(newmatch, traverse) <= 0)
1104 newmatch->duplicates = traverse;
1108 *matchlist = newmatch; /* update pointer to head of list */
1110 newmatch->hasdupes = 1;
1111 traverse->hasdupes = 0; /* flag is only for first file in dupe chain */
1114 back->duplicates = newmatch;
1120 if (traverse->duplicates == 0)
1122 traverse->duplicates = newmatch;
1125 traverse->hasdupes = 1;
1132 traverse = traverse->duplicates;
1136 void deletesuccessor(file_t **existing, file_t *duplicate,
1137 int (*comparef)(file_t *f1, file_t *f2), struct log_info *loginfo)
1142 if (comparef(duplicate, *existing) >= 0)
1144 to_keep = *existing;
1145 to_delete = duplicate;
1149 to_keep = duplicate;
1150 to_delete = *existing;
1152 *existing = duplicate;
1155 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1158 log_begin_set(loginfo);
1160 printf(" [+] %s\n", to_keep->d_name);
1163 log_file_remaining(loginfo, to_keep->d_name);
1165 if (remove(to_delete->d_name) == 0) {
1166 printf(" [-] %s\n", to_delete->d_name);
1169 log_file_deleted(loginfo, to_delete->d_name);
1171 printf(" [!] %s ", to_delete->d_name);
1172 printf("-- unable to delete file!\n");
1175 log_file_remaining(loginfo, to_delete->d_name);
1179 log_end_set(loginfo);
1186 printf("Usage: fdupes [options] DIRECTORY...\n\n");
1188 /* 0 1 0 2 0 3 0 4 0 5 0 6 0 7 0 8 0
1189 -------"---------|---------|---------|---------|---------|---------|---------|---------|"
1191 printf(" -r --recurse for every directory given follow subdirectories\n");
1192 printf(" encountered within\n");
1193 printf(" -R --recurse: for each directory given after this option follow\n");
1194 printf(" subdirectories encountered within (note the ':' at the\n");
1195 printf(" end of the option, manpage for more details)\n");
1196 printf(" -s --symlinks follow symlinks\n");
1197 printf(" -H --hardlinks normally, when two or more files point to the same\n");
1198 printf(" disk area they are treated as non-duplicates; this\n");
1199 printf(" option will change this behavior\n");
1200 printf(" -G --minsize=SIZE consider only files greater than or equal to SIZE bytes\n");
1201 printf(" -L --maxsize=SIZE consider only files less than or equal to SIZE bytes\n");
1202 printf(" -n --noempty exclude zero-length files from consideration\n");
1203 printf(" -A --nohidden exclude hidden files from consideration\n");
1204 printf(" -f --omitfirst omit the first file in each set of matches\n");
1205 printf(" -1 --sameline list each set of matches on a single line\n");
1206 printf(" -S --size show size of duplicate files\n");
1207 printf(" -t --time show modification time of duplicate files\n");
1208 printf(" -m --summarize summarize dupe information\n");
1209 printf(" -q --quiet hide progress indicator\n");
1210 printf(" -d --delete prompt user for files to preserve and delete all\n");
1211 printf(" others; important: under particular circumstances,\n");
1212 printf(" data may be lost when using this option together\n");
1213 printf(" with -s or --symlinks, or when specifying a\n");
1214 printf(" particular directory more than once; refer to the\n");
1215 printf(" fdupes documentation for additional information\n");
1217 printf(" -P --plain with --delete, use line-based prompt (as with older\n");
1218 printf(" versions of fdupes) instead of screen-mode interface\n");
1220 printf(" -N --noprompt together with --delete, preserve the first file in\n");
1221 printf(" each set of duplicates and delete the rest without\n");
1222 printf(" prompting the user\n");
1223 printf(" -I --immediate delete duplicates as they are encountered, without\n");
1224 printf(" grouping into sets; implies --noprompt\n");
1225 printf(" -p --permissions don't consider files with different owner/group or\n");
1226 printf(" permission bits as duplicates\n");
1227 printf(" -o --order=BY select sort order for output and deleting; by file\n");
1228 printf(" modification time (BY='time'; default), status\n");
1229 printf(" change time (BY='ctime'), or filename (BY='name')\n");
1230 printf(" -i --reverse reverse order while sorting\n");
1231 printf(" -l --log=LOGFILE log file deletion choices to LOGFILE\n");
1232 printf(" -v --version display fdupes version\n");
1233 printf(" -h --help display this help message\n\n");
1234 #ifndef HAVE_GETOPT_H
1235 printf("Note: Long options are not supported in this fdupes build.\n\n");
1239 int main(int argc, char **argv) {
1244 file_t *files = NULL;
1246 file_t **match = NULL;
1247 filetree_t *checktree = NULL;
1253 struct log_info *loginfo = NULL;
1255 struct stat logfile_status;
1258 #ifdef HAVE_GETOPT_H
1259 static struct option long_options[] =
1261 { "omitfirst", 0, 0, 'f' },
1262 { "recurse", 0, 0, 'r' },
1263 { "recurse:", 0, 0, 'R' },
1264 { "quiet", 0, 0, 'q' },
1265 { "sameline", 0, 0, '1' },
1266 { "size", 0, 0, 'S' },
1267 { "time", 0, 0, 't' },
1268 { "symlinks", 0, 0, 's' },
1269 { "hardlinks", 0, 0, 'H' },
1270 { "minsize", 1, 0, 'G' },
1271 { "maxsize", 1, 0, 'L' },
1272 { "noempty", 0, 0, 'n' },
1273 { "nohidden", 0, 0, 'A' },
1274 { "delete", 0, 0, 'd' },
1275 { "plain", 0, 0, 'P' },
1276 { "version", 0, 0, 'v' },
1277 { "help", 0, 0, 'h' },
1278 { "noprompt", 0, 0, 'N' },
1279 { "immediate", 0, 0, 'I'},
1280 { "summarize", 0, 0, 'm'},
1281 { "summary", 0, 0, 'm' },
1282 { "permissions", 0, 0, 'p' },
1283 { "order", 1, 0, 'o' },
1284 { "reverse", 0, 0, 'i' },
1285 { "log", 1, 0, 'l' },
1288 #define GETOPT getopt_long
1290 #define GETOPT getopt
1293 program_name = argv[0];
1295 setlocale(LC_CTYPE, "");
1297 oldargv = cloneargs(argc, argv);
1299 while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:"
1300 #ifdef HAVE_GETOPT_H
1301 , long_options, NULL
1306 SETFLAG(flags, F_OMITFIRST);
1309 SETFLAG(flags, F_RECURSE);
1312 SETFLAG(flags, F_RECURSEAFTER);
1315 SETFLAG(flags, F_HIDEPROGRESS);
1318 SETFLAG(flags, F_DSAMELINE);
1321 SETFLAG(flags, F_SHOWSIZE);
1324 SETFLAG(flags, F_SHOWTIME);
1327 SETFLAG(flags, F_FOLLOWLINKS);
1330 SETFLAG(flags, F_CONSIDERHARDLINKS);
1333 minsize = strtoll(optarg, &endptr, 10);
1334 if (optarg[0] == '\0' || *endptr != '\0' || minsize < 0)
1336 errormsg("invalid value for --minsize: '%s'\n", optarg);
1341 maxsize = strtoll(optarg, &endptr, 10);
1342 if (optarg[0] == '\0' || *endptr != '\0' || maxsize < 0)
1344 errormsg("invalid value for --maxsize: '%s'\n", optarg);
1349 SETFLAG(flags, F_EXCLUDEEMPTY);
1352 SETFLAG(flags, F_EXCLUDEHIDDEN);
1355 SETFLAG(flags, F_DELETEFILES);
1358 SETFLAG(flags, F_PLAINPROMPT);
1361 printf("fdupes %s\n", VERSION);
1367 SETFLAG(flags, F_NOPROMPT);
1370 SETFLAG(flags, F_IMMEDIATE);
1373 SETFLAG(flags, F_SUMMARIZEMATCHES);
1376 SETFLAG(flags, F_PERMISSIONS);
1379 if (!strcasecmp("name", optarg)) {
1380 ordertype = ORDER_NAME;
1381 } else if (!strcasecmp("time", optarg)) {
1382 ordertype = ORDER_MTIME;
1383 } else if (!strcasecmp("ctime", optarg)) {
1384 ordertype = ORDER_CTIME;
1386 errormsg("invalid value for --order: '%s'\n", optarg);
1391 SETFLAG(flags, F_REVERSE);
1397 fprintf(stderr, "Try `fdupes --help' for more information.\n");
1402 if (optind >= argc) {
1403 errormsg("no directories specified\n");
1407 if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
1408 errormsg("options --recurse and --recurse: are not compatible\n");
1412 if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
1413 errormsg("options --summarize and --delete are not compatible\n");
1417 if (!ISFLAG(flags, F_DELETEFILES))
1422 loginfo = log_open(logfile, &log_error);
1425 if (log_error == LOG_ERROR_NOT_A_LOG_FILE)
1426 errormsg("%s: doesn't look like an fdupes log file\n", logfile);
1428 errormsg("%s: could not open log file\n", logfile);
1433 if (stat(logfile, &logfile_status) != 0)
1435 errormsg("could not read log file status\n");
1440 if (ISFLAG(flags, F_RECURSEAFTER)) {
1441 firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind);
1443 if (firstrecurse == argc)
1444 firstrecurse = nonoptafter("-R", argc, oldargv, argv, optind);
1446 if (firstrecurse == argc) {
1447 errormsg("-R option must be isolated from other options\n");
1451 /* F_RECURSE is not set for directories before --recurse: */
1452 for (x = optind; x < firstrecurse; x++)
1453 filecount += grokdir(argv[x], &files, &logfile_status);
1455 /* Set F_RECURSE for directories after --recurse: */
1456 SETFLAG(flags, F_RECURSE);
1458 for (x = firstrecurse; x < argc; x++)
1459 filecount += grokdir(argv[x], &files, &logfile_status);
1461 for (x = optind; x < argc; x++)
1462 filecount += grokdir(argv[x], &files, &logfile_status);
1466 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1474 registerfile(&checktree, curfile);
1476 match = checkmatch(&checktree, checktree, curfile);
1478 if (match != NULL) {
1479 file1 = fopen(curfile->d_name, "rb");
1481 curfile = curfile->next;
1485 file2 = fopen((*match)->d_name, "rb");
1488 curfile = curfile->next;
1492 if (confirmmatch(file1, file2)) {
1493 if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_IMMEDIATE))
1494 deletesuccessor(match, curfile,
1495 ordertype == ORDER_MTIME ? sort_pairs_by_mtime :
1496 ordertype == ORDER_CTIME ? sort_pairs_by_ctime :
1497 sort_pairs_by_filename, loginfo );
1499 registerpair(match, curfile,
1500 ordertype == ORDER_MTIME ? sort_pairs_by_mtime :
1501 ordertype == ORDER_CTIME ? sort_pairs_by_ctime :
1502 sort_pairs_by_filename );
1509 curfile = curfile->next;
1511 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1512 fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
1513 (int)((float) progress / (float) filecount * 100.0));
1518 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1526 if (ISFLAG(flags, F_DELETEFILES))
1528 if (ISFLAG(flags, F_NOPROMPT) || ISFLAG(flags, F_IMMEDIATE))
1530 deletefiles(files, 0, 0, logfile);
1535 if (!ISFLAG(flags, F_PLAINPROMPT))
1537 if (newterm(getenv("TERM"), stdout, stdin) != 0)
1539 deletefiles_ncurses(files, logfile);
1543 errormsg("could not enter screen mode; falling back to plain mode\n\n");
1544 SETFLAG(flags, F_PLAINPROMPT);
1548 if (ISFLAG(flags, F_PLAINPROMPT))
1550 if (freopen("/dev/tty", "r", stdin) == NULL)
1552 errormsg("could not open terminal for input\n");
1556 deletefiles(files, 1, stdin, logfile);
1559 if (freopen("/dev/tty", "r", stdin) == NULL)
1561 errormsg("could not open terminal for input\n");
1565 deletefiles(files, 1, stdin, logfile);
1572 if (ISFLAG(flags, F_SUMMARIZEMATCHES))
1573 summarizematches(files);
1577 printmatches(files);
1580 curfile = files->next;
1581 free(files->d_name);
1582 free(files->crcsignature);
1583 free(files->crcpartial);
1588 for (x = 0; x < argc; x++)
1593 purgetree(checktree);