1 /* FDUPES Copyright (c) 1999-2018 Adrian Lopez
3 Permission is hereby granted, free of charge, to any person
4 obtaining a copy of this software and associated documentation files
5 (the "Software"), to deal in the Software without restriction,
6 including without limitation the rights to use, copy, modify, merge,
7 publish, distribute, sublicense, and/or sell copies of the Software,
8 and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
39 #ifdef HAVE_NCURSESW_CURSES_H
40 #include <ncursesw/curses.h>
44 #include "ncurses-interface.h"
52 long long minsize = -1;
53 long long maxsize = -1;
63 ordertype_t ordertype = ORDER_MTIME;
65 #define CHUNK_SIZE 8192
67 #define INPUT_SIZE 256
69 #define PARTIAL_MD5_SIZE 4096
71 #define MD5_DIGEST_LENGTH 16
75 TODO: Partial sums (for working with very large files).
77 typedef struct _signature
80 md5_byte_t digest[16];
83 typedef struct _signatures
86 signature_t *signatures;
91 typedef struct _filetree {
93 struct _filetree *left;
94 struct _filetree *right;
97 void escapefilename(char *escape_list, char **filename_ptr)
104 filename = *filename_ptr;
106 tmp = (char*) malloc(strlen(filename) * 2 + 1);
108 errormsg("out of memory!\n");
112 for (x = 0, tx = 0; x < strlen(filename); x++) {
113 if (strchr(escape_list, filename[x]) != NULL) tmp[tx++] = '\\';
114 tmp[tx++] = filename[x];
120 *filename_ptr = realloc(*filename_ptr, strlen(tmp) + 1);
121 if (*filename_ptr == NULL) {
122 errormsg("out of memory!\n");
125 strcpy(*filename_ptr, tmp);
129 off_t filesize(char *filename) {
132 if (stat(filename, &s) != 0) return -1;
137 dev_t getdevice(char *filename) {
140 if (stat(filename, &s) != 0) return 0;
145 ino_t getinode(char *filename) {
148 if (stat(filename, &s) != 0) return 0;
153 time_t getmtime(char *filename) {
156 if (stat(filename, &s) != 0) return 0;
161 time_t getctime(char *filename) {
164 if (stat(filename, &s) != 0) return 0;
169 char *fmtmtime(char *filename) {
171 time_t t = getmtime(filename);
173 strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M", localtime(&t));
177 char **cloneargs(int argc, char **argv)
182 args = (char **) malloc(sizeof(char*) * argc);
184 errormsg("out of memory!\n");
188 for (x = 0; x < argc; x++) {
189 args[x] = (char*) malloc(strlen(argv[x]) + 1);
190 if (args[x] == NULL) {
192 errormsg("out of memory!\n");
196 strcpy(args[x], argv[x]);
202 int findarg(char *arg, int start, int argc, char **argv)
206 for (x = start; x < argc; x++)
207 if (strcmp(argv[x], arg) == 0)
213 /* Find the first non-option argument after specified option. */
214 int nonoptafter(char *option, int argc, char **oldargv,
215 char **newargv, int optind)
222 targetind = findarg(option, 1, argc, oldargv);
224 for (x = optind; x < argc; x++) {
225 testind = findarg(newargv[x], startat, argc, oldargv);
226 if (testind > targetind) return x;
227 else startat = testind;
233 void getfilestats(file_t *file)
235 file->size = filesize(file->d_name);
236 file->inode = getinode(file->d_name);
237 file->device = getdevice(file->d_name);
242 file->sorttime = getctime(file->d_name);
246 file->sorttime = getmtime(file->d_name);
251 int grokdir(char *dir, file_t **filelistp, struct stat *logfile_status)
255 struct dirent *dirinfo;
260 static int progress = 0;
261 static char indicator[] = "-\\|/";
262 char *fullname, *name;
268 errormsg("could not chdir to %s\n", dir);
272 while ((dirinfo = readdir(cd)) != NULL) {
273 if (strcmp(dirinfo->d_name, ".") && strcmp(dirinfo->d_name, "..")) {
274 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
275 fprintf(stderr, "\rBuilding file list %c ", indicator[progress]);
276 progress = (progress + 1) % 4;
279 newfile = (file_t*) malloc(sizeof(file_t));
282 errormsg("out of memory!\n");
285 } else newfile->next = *filelistp;
289 newfile->crcsignature = NULL;
290 newfile->crcpartial = NULL;
291 newfile->duplicates = NULL;
292 newfile->hasdupes = 0;
294 newfile->d_name = (char*)malloc(strlen(dir)+strlen(dirinfo->d_name)+2);
296 if (!newfile->d_name) {
297 errormsg("out of memory!\n");
303 strcpy(newfile->d_name, dir);
304 lastchar = strlen(dir) - 1;
305 if (lastchar >= 0 && dir[lastchar] != '/')
306 strcat(newfile->d_name, "/");
307 strcat(newfile->d_name, dirinfo->d_name);
309 if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
310 fullname = strdup(newfile->d_name);
313 errormsg("out of memory!\n");
318 name = basename(fullname);
319 if (name[0] == '.' && strcmp(name, ".") && strcmp(name, "..") ) {
320 free(newfile->d_name);
327 if (stat(newfile->d_name, &info) == -1) {
328 free(newfile->d_name);
333 size = filesize(newfile->d_name);
334 if (!S_ISDIR(info.st_mode) && (((size == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) || size < minsize || (size > maxsize && maxsize != -1)))) {
335 free(newfile->d_name);
340 if (info.st_dev == logfile_status->st_dev && info.st_ino == logfile_status->st_ino)
342 free(newfile->d_name);
347 if (lstat(newfile->d_name, &linfo) == -1) {
348 free(newfile->d_name);
353 if (S_ISDIR(info.st_mode)) {
354 if (ISFLAG(flags, F_RECURSE) && (ISFLAG(flags, F_FOLLOWLINKS) || !S_ISLNK(linfo.st_mode)))
355 filecount += grokdir(newfile->d_name, filelistp, logfile_status);
356 free(newfile->d_name);
359 if (S_ISREG(linfo.st_mode) || (S_ISLNK(linfo.st_mode) && ISFLAG(flags, F_FOLLOWLINKS))) {
360 getfilestats(newfile);
361 *filelistp = newfile;
364 free(newfile->d_name);
376 md5_byte_t *getcrcsignatureuntil(char *filename, off_t max_read)
381 static md5_byte_t digest[MD5_DIGEST_LENGTH];
382 static md5_byte_t chunk[CHUNK_SIZE];
388 fsize = filesize(filename);
390 if (max_read != 0 && fsize > max_read)
393 file = fopen(filename, "rb");
395 errormsg("error opening file %s\n", filename);
400 toread = (fsize >= CHUNK_SIZE) ? CHUNK_SIZE : fsize;
401 if (fread(chunk, toread, 1, file) != 1) {
402 errormsg("error reading from file %s\n", filename);
406 md5_append(&state, chunk, toread);
410 md5_finish(&state, digest);
417 md5_byte_t *getcrcsignature(char *filename)
419 return getcrcsignatureuntil(filename, 0);
422 md5_byte_t *getcrcpartialsignature(char *filename)
424 return getcrcsignatureuntil(filename, PARTIAL_MD5_SIZE);
427 int md5cmp(const md5_byte_t *a, const md5_byte_t *b)
431 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
435 else if (a[x] > b[x])
442 void md5copy(md5_byte_t *to, const md5_byte_t *from)
446 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
450 void purgetree(filetree_t *checktree)
452 if (checktree->left != NULL) purgetree(checktree->left);
454 if (checktree->right != NULL) purgetree(checktree->right);
459 int registerfile(filetree_t **branch, file_t *file)
461 *branch = (filetree_t*) malloc(sizeof(filetree_t));
462 if (*branch == NULL) {
463 errormsg("out of memory!\n");
467 (*branch)->file = file;
468 (*branch)->left = NULL;
469 (*branch)->right = NULL;
474 int same_permissions(char* name1, char* name2)
478 if (stat(name1, &s1) != 0) return -1;
479 if (stat(name2, &s2) != 0) return -1;
481 return (s1.st_mode == s2.st_mode &&
482 s1.st_uid == s2.st_uid &&
483 s1.st_gid == s2.st_gid);
486 int is_hardlink(filetree_t *checktree, file_t *file)
490 if ((file->inode == checktree->file->inode) &&
491 (file->device == checktree->file->device))
494 if (checktree->file->hasdupes)
496 dupe = checktree->file->duplicates;
499 if ((file->inode == dupe->inode) &&
500 (file->device == dupe->device))
503 dupe = dupe->duplicates;
504 } while (dupe != NULL);
510 /* check whether two paths represent the same file (deleting one would delete the other) */
511 int is_same_file(file_t *file_a, file_t *file_b)
519 struct stat dirstat_a;
520 struct stat dirstat_b;
522 /* if files on different devices and/or different inodes, they are not the same file */
523 if (file_a->device != file_b->device || file_a->inode != file_b->inode)
526 /* copy filenames (basename and dirname may modify these) */
527 filename_a = strdup(file_a->d_name);
531 filename_b = strdup(file_b->d_name);
535 /* get file basenames */
536 basename_a = basename(filename_a);
537 memmove(filename_a, basename_a, strlen(basename_a) + 1);
539 basename_b = basename(filename_b);
540 memmove(filename_b, basename_b, strlen(basename_b) + 1);
542 /* if files have different names, they are not the same file */
543 if (strcmp(filename_a, filename_b) != 0)
551 strcpy(filename_a, file_a->d_name);
552 strcpy(filename_b, file_b->d_name);
554 /* get directory names */
555 dirname_a = dirname(filename_a);
556 if (stat(dirname_a, &dirstat_a) != 0)
563 dirname_b = dirname(filename_b);
564 if (stat(dirname_b, &dirstat_b) != 0)
574 /* if directories on which files reside are different, they are not the same file */
575 if (dirstat_a.st_dev != dirstat_b.st_dev || dirstat_a.st_ino != dirstat_b.st_ino)
578 /* same device, inode, filename, and directory; therefore, same file */
582 /* check whether given tree node already contains a copy of given file */
583 int has_same_file(filetree_t *checktree, file_t *file)
587 if (is_same_file(checktree->file, file))
590 if (checktree->file->hasdupes)
592 dupe = checktree->file->duplicates;
595 if (is_same_file(dupe, file))
598 dupe = dupe->duplicates;
599 } while (dupe != NULL);
605 file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
608 md5_byte_t *crcsignature;
611 if (ISFLAG(flags, F_CONSIDERHARDLINKS))
613 /* If node already contains file, we don't want to add it again.
615 if (has_same_file(checktree, file))
620 /* If device and inode fields are equal one of the files is a
621 hard link to the other or the files have been listed twice
622 unintentionally. We don't want to flag these files as
623 duplicates unless the user specifies otherwise.
625 if (is_hardlink(checktree, file))
629 fsize = filesize(file->d_name);
631 if (fsize < checktree->file->size)
634 if (fsize > checktree->file->size) cmpresult = 1;
636 if (ISFLAG(flags, F_PERMISSIONS) &&
637 !same_permissions(file->d_name, checktree->file->d_name))
640 if (checktree->file->crcpartial == NULL) {
641 crcsignature = getcrcpartialsignature(checktree->file->d_name);
642 if (crcsignature == NULL) {
643 errormsg ("cannot read file %s\n", checktree->file->d_name);
647 checktree->file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
648 if (checktree->file->crcpartial == NULL) {
649 errormsg("out of memory\n");
652 md5copy(checktree->file->crcpartial, crcsignature);
655 if (file->crcpartial == NULL) {
656 crcsignature = getcrcpartialsignature(file->d_name);
657 if (crcsignature == NULL) {
658 errormsg ("cannot read file %s\n", file->d_name);
662 file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
663 if (file->crcpartial == NULL) {
664 errormsg("out of memory\n");
667 md5copy(file->crcpartial, crcsignature);
670 cmpresult = md5cmp(file->crcpartial, checktree->file->crcpartial);
671 /*if (cmpresult != 0) errormsg(" on %s vs %s\n", file->d_name, checktree->file->d_name);*/
673 if (cmpresult == 0) {
674 if (checktree->file->crcsignature == NULL) {
675 crcsignature = getcrcsignature(checktree->file->d_name);
676 if (crcsignature == NULL) return NULL;
678 checktree->file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
679 if (checktree->file->crcsignature == NULL) {
680 errormsg("out of memory\n");
683 md5copy(checktree->file->crcsignature, crcsignature);
686 if (file->crcsignature == NULL) {
687 crcsignature = getcrcsignature(file->d_name);
688 if (crcsignature == NULL) return NULL;
690 file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
691 if (file->crcsignature == NULL) {
692 errormsg("out of memory\n");
695 md5copy(file->crcsignature, crcsignature);
698 cmpresult = md5cmp(file->crcsignature, checktree->file->crcsignature);
699 /*if (cmpresult != 0) errormsg("P on %s vs %s\n",
700 file->d_name, checktree->file->d_name);
701 else errormsg("P F on %s vs %s\n", file->d_name,
702 checktree->file->d_name);
703 printf("%s matches %s\n", file->d_name, checktree->file->d_name);*/
708 if (checktree->left != NULL) {
709 return checkmatch(root, checktree->left, file);
711 registerfile(&(checktree->left), file);
714 } else if (cmpresult > 0) {
715 if (checktree->right != NULL) {
716 return checkmatch(root, checktree->right, file);
718 registerfile(&(checktree->right), file);
723 return &checktree->file;
727 /* Do a bit-for-bit comparison in case two different files produce the
728 same signature. Unlikely, but better safe than sorry. */
730 int confirmmatch(FILE *file1, FILE *file2)
732 unsigned char c1[CHUNK_SIZE];
733 unsigned char c2[CHUNK_SIZE];
737 fseek(file1, 0, SEEK_SET);
738 fseek(file2, 0, SEEK_SET);
741 r1 = fread(c1, sizeof(unsigned char), sizeof(c1), file1);
742 r2 = fread(c2, sizeof(unsigned char), sizeof(c2), file2);
744 if (r1 != r2) return 0; /* file lengths are different */
745 if (memcmp (c1, c2, r1)) return 0; /* file contents are different */
751 void summarizematches(file_t *files)
754 double numbytes = 0.0;
758 while (files != NULL)
764 tmpfile = files->duplicates;
765 while (tmpfile != NULL)
768 numbytes += files->size;
769 tmpfile = tmpfile->duplicates;
777 printf("No duplicates found.\n\n");
780 if (numbytes < 1024.0)
781 printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n", numfiles, numsets, numbytes);
782 else if (numbytes <= (1000.0 * 1000.0))
783 printf("%d duplicate files (in %d sets), occupying %.1f kilobytes\n\n", numfiles, numsets, numbytes / 1000.0);
785 printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n", numfiles, numsets, numbytes / (1000.0 * 1000.0));
790 void printmatches(file_t *files)
794 while (files != NULL) {
795 if (files->hasdupes) {
796 if (!ISFLAG(flags, F_OMITFIRST)) {
797 if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", (long long int)files->size,
798 (files->size != 1) ? "s " : " ");
799 if (ISFLAG(flags, F_SHOWTIME))
800 printf("%s ", fmtmtime(files->d_name));
801 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
802 printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
804 tmpfile = files->duplicates;
805 while (tmpfile != NULL) {
806 if (ISFLAG(flags, F_SHOWTIME))
807 printf("%s ", fmtmtime(tmpfile->d_name));
808 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &tmpfile->d_name);
809 printf("%s%c", tmpfile->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
810 tmpfile = tmpfile->duplicates;
821 #define REVISE_APPEND "_tmp"
822 char *revisefilename(char *path, int seq)
829 digits = numdigits(seq);
830 newpath = malloc(strlen(path) + strlen(REVISE_APPEND) + digits + 1);
831 if (!newpath) return newpath;
833 scratch = malloc(strlen(path) + 1);
834 if (!scratch) return newpath;
836 strcpy(scratch, path);
837 dot = strrchr(scratch, '.');
841 sprintf(newpath, "%s%s%d.%s", scratch, REVISE_APPEND, seq, dot + 1);
846 sprintf(newpath, "%s%s%d", path, REVISE_APPEND, seq);
854 int relink(char *oldfile, char *newfile)
861 od = getdevice(oldfile);
862 oi = getinode(oldfile);
864 if (link(oldfile, newfile) != 0)
867 /* make sure we're working with the right file (the one we created) */
868 nd = getdevice(newfile);
869 ni = getinode(newfile);
871 if (nd != od || oi != ni)
872 return 0; /* file is not what we expected */
877 void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile)
894 struct log_info *loginfo;
900 if (curfile->hasdupes) {
904 tmpfile = curfile->duplicates;
907 tmpfile = tmpfile->duplicates;
910 if (counter > max) max = counter;
913 curfile = curfile->next;
918 dupelist = (file_t**) malloc(sizeof(file_t*) * max);
919 preserve = (int*) malloc(sizeof(int) * max);
920 preservestr = (char*) malloc(INPUT_SIZE);
922 if (!dupelist || !preserve || !preservestr) {
923 errormsg("out of memory\n");
929 loginfo = log_open(logfile, &log_error);
931 register_sigint_handler();
934 if (files->hasdupes) {
937 dupelist[counter] = files;
941 if (ISFLAG(flags, F_SHOWTIME))
942 printf("[%d] [%s] %s\n", counter, fmtmtime(files->d_name), files->d_name);
944 printf("[%d] %s\n", counter, files->d_name);
947 tmpfile = files->duplicates;
950 dupelist[++counter] = tmpfile;
953 if (ISFLAG(flags, F_SHOWTIME))
954 printf("[%d] [%s] %s\n", counter, fmtmtime(tmpfile->d_name), tmpfile->d_name);
956 printf("[%d] %s\n", counter, tmpfile->d_name);
958 tmpfile = tmpfile->duplicates;
961 if (prompt) printf("\n");
963 if (!prompt) /* preserve only the first file */
966 for (x = 2; x <= counter; x++) preserve[x] = 0;
969 else /* prompt for files to preserve */
972 printf("Set %d of %d, preserve files [1 - %d, all, quit]",
973 curgroup, groups, counter);
974 if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", (long long int)files->size,
975 (files->size != 1) ? "s " : " ");
979 if (!fgets(preservestr, INPUT_SIZE, tty))
981 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
982 preservestr[1] = '\0';
999 i = strlen(preservestr) - 1;
1001 while (preservestr[i]!='\n'){ /* tail of buffer must be a newline */
1003 realloc(preservestr, strlen(preservestr) + 1 + INPUT_SIZE);
1004 if (!tstr) { /* couldn't allocate memory, treat as fatal */
1005 errormsg("out of memory!\n");
1010 if (!fgets(preservestr + i + 1, INPUT_SIZE, tty))
1012 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
1013 preservestr[1] = '\0';
1016 i = strlen(preservestr)-1;
1019 if (strcmp(preservestr, "q\n") == 0 || strcmp(preservestr, "quit\n") == 0)
1033 for (x = 1; x <= counter; x++) preserve[x] = 0;
1035 token = strtok(preservestr, " ,\n");
1037 while (token != NULL) {
1038 if (strcasecmp(token, "all") == 0 || strcasecmp(token, "a") == 0)
1039 for (x = 0; x <= counter; x++) preserve[x] = 1;
1042 sscanf(token, "%d", &number);
1043 if (number > 0 && number <= counter) preserve[number] = 1;
1045 token = strtok(NULL, " ,\n");
1048 for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x];
1049 } while (sum < 1); /* make sure we've preserved at least one file */
1054 log_begin_set(loginfo);
1056 for (x = 1; x <= counter; x++) {
1059 printf(" [+] %s\n", dupelist[x]->d_name);
1062 log_file_remaining(loginfo, dupelist[x]->d_name);
1065 if (remove(dupelist[x]->d_name) == 0) {
1066 printf(" [-] %s\n", dupelist[x]->d_name);
1069 log_file_deleted(loginfo, dupelist[x]->d_name);
1071 printf(" [!] %s ", dupelist[x]->d_name);
1072 printf("-- unable to delete file!\n");
1075 log_file_remaining(loginfo, dupelist[x]->d_name);
1082 log_end_set(loginfo);
1085 files = files->next;
1096 int sort_pairs_by_arrival(file_t *f1, file_t *f2)
1098 if (f2->duplicates != 0)
1099 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1101 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1104 int sort_pairs_by_time(file_t *f1, file_t *f2)
1106 if (f1->sorttime < f2->sorttime)
1107 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1108 else if (f1->sorttime > f2->sorttime)
1109 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1114 int sort_pairs_by_filename(file_t *f1, file_t *f2)
1116 int strvalue = strcmp(f1->d_name, f2->d_name);
1117 return !ISFLAG(flags, F_REVERSE) ? strvalue : -strvalue;
1120 void registerpair(file_t **matchlist, file_t *newmatch,
1121 int (*comparef)(file_t *f1, file_t *f2))
1126 (*matchlist)->hasdupes = 1;
1129 traverse = *matchlist;
1132 if (comparef(newmatch, traverse) <= 0)
1134 newmatch->duplicates = traverse;
1138 *matchlist = newmatch; /* update pointer to head of list */
1140 newmatch->hasdupes = 1;
1141 traverse->hasdupes = 0; /* flag is only for first file in dupe chain */
1144 back->duplicates = newmatch;
1150 if (traverse->duplicates == 0)
1152 traverse->duplicates = newmatch;
1155 traverse->hasdupes = 1;
1162 traverse = traverse->duplicates;
1166 void deletesuccessor(file_t **existing, file_t *duplicate,
1167 int (*comparef)(file_t *f1, file_t *f2), struct log_info *loginfo)
1172 if (comparef(duplicate, *existing) >= 0)
1174 to_keep = *existing;
1175 to_delete = duplicate;
1179 to_keep = duplicate;
1180 to_delete = *existing;
1182 *existing = duplicate;
1185 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1187 printf(" [+] %s\n", to_keep->d_name);
1190 log_file_remaining(loginfo, to_keep->d_name);
1192 if (remove(to_delete->d_name) == 0) {
1193 printf(" [-] %s\n", to_delete->d_name);
1196 log_file_deleted(loginfo, to_delete->d_name);
1198 printf(" [!] %s ", to_delete->d_name);
1199 printf("-- unable to delete file!\n");
1202 log_file_remaining(loginfo, to_delete->d_name);
1210 printf("Usage: fdupes [options] DIRECTORY...\n\n");
1212 printf(" -r --recurse \tfor every directory given follow subdirectories\n");
1213 printf(" \tencountered within\n");
1214 printf(" -R --recurse: \tfor each directory given after this option follow\n");
1215 printf(" \tsubdirectories encountered within (note the ':' at\n");
1216 printf(" \tthe end of the option, manpage for more details)\n");
1217 printf(" -s --symlinks \tfollow symlinks\n");
1218 printf(" -H --hardlinks \tnormally, when two or more files point to the same\n");
1219 printf(" \tdisk area they are treated as non-duplicates; this\n");
1220 printf(" \toption will change this behavior\n");
1221 printf(" -G --minsize=SIZE\tconsider only files greater than or equal to SIZE\n");
1222 printf(" -L --maxsize=SIZE\tconsider only files less than or equal to SIZE\n");
1223 printf(" -n --noempty \texclude zero-length files from consideration\n");
1224 printf(" -A --nohidden \texclude hidden files from consideration\n");
1225 printf(" -f --omitfirst \tomit the first file in each set of matches\n");
1226 printf(" -1 --sameline \tlist each set of matches on a single line\n");
1227 printf(" -S --size \tshow size of duplicate files\n");
1228 printf(" -t --time \tshow modification time of duplicate files\n");
1229 printf(" -m --summarize \tsummarize dupe information\n");
1230 printf(" -q --quiet \thide progress indicator\n");
1231 printf(" -d --delete \tprompt user for files to preserve and delete all\n");
1232 printf(" \tothers; important: under particular circumstances,\n");
1233 printf(" \tdata may be lost when using this option together\n");
1234 printf(" \twith -s or --symlinks, or when specifying a\n");
1235 printf(" \tparticular directory more than once; refer to the\n");
1236 printf(" \tfdupes documentation for additional information\n");
1238 printf(" -P --plain \twith --delete, use line-based prompt (as with older\n");
1239 printf(" \tversions of fdupes) instead of screen-mode interface\n");
1241 printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n");
1242 printf(" \teach set of duplicates and delete the rest without\n");
1243 printf(" \tprompting the user\n");
1244 printf(" -I --immediate \tdelete duplicates as they are encountered, without\n");
1245 printf(" \tgrouping into sets; implies --noprompt\n");
1246 printf(" -p --permissions \tdon't consider files with different owner/group or\n");
1247 printf(" \tpermission bits as duplicates\n");
1248 printf(" -o --order=BY \tselect sort order for output and deleting; by file\n");
1249 printf(" \tmodification time (BY='time'; default), status\n");
1250 printf(" \tchange time (BY='ctime'), or filename (BY='name')\n");
1251 printf(" -i --reverse \treverse order while sorting\n");
1252 printf(" -l --log=LOGFILE \tlog file deletion choices to LOGFILE\n");
1253 printf(" -v --version \tdisplay fdupes version\n");
1254 printf(" -h --help \tdisplay this help message\n\n");
1255 #ifndef HAVE_GETOPT_H
1256 printf("Note: Long options are not supported in this fdupes build.\n\n");
1260 int main(int argc, char **argv) {
1265 file_t *files = NULL;
1267 file_t **match = NULL;
1268 filetree_t *checktree = NULL;
1274 struct log_info *loginfo;
1276 struct stat logfile_status;
1279 #ifdef HAVE_GETOPT_H
1280 static struct option long_options[] =
1282 { "omitfirst", 0, 0, 'f' },
1283 { "recurse", 0, 0, 'r' },
1284 { "recurse:", 0, 0, 'R' },
1285 { "quiet", 0, 0, 'q' },
1286 { "sameline", 0, 0, '1' },
1287 { "size", 0, 0, 'S' },
1288 { "time", 0, 0, 't' },
1289 { "symlinks", 0, 0, 's' },
1290 { "hardlinks", 0, 0, 'H' },
1291 { "minsize", 1, 0, 'G' },
1292 { "maxsize", 1, 0, 'L' },
1293 { "noempty", 0, 0, 'n' },
1294 { "nohidden", 0, 0, 'A' },
1295 { "delete", 0, 0, 'd' },
1296 { "plain", 0, 0, 'P' },
1297 { "version", 0, 0, 'v' },
1298 { "help", 0, 0, 'h' },
1299 { "noprompt", 0, 0, 'N' },
1300 { "immediate", 0, 0, 'I'},
1301 { "summarize", 0, 0, 'm'},
1302 { "summary", 0, 0, 'm' },
1303 { "permissions", 0, 0, 'p' },
1304 { "order", 1, 0, 'o' },
1305 { "reverse", 0, 0, 'i' },
1306 { "log", 1, 0, 'l' },
1309 #define GETOPT getopt_long
1311 #define GETOPT getopt
1314 program_name = argv[0];
1316 setlocale(LC_CTYPE, "");
1318 oldargv = cloneargs(argc, argv);
1320 while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:"
1321 #ifdef HAVE_GETOPT_H
1322 , long_options, NULL
1327 SETFLAG(flags, F_OMITFIRST);
1330 SETFLAG(flags, F_RECURSE);
1333 SETFLAG(flags, F_RECURSEAFTER);
1336 SETFLAG(flags, F_HIDEPROGRESS);
1339 SETFLAG(flags, F_DSAMELINE);
1342 SETFLAG(flags, F_SHOWSIZE);
1345 SETFLAG(flags, F_SHOWTIME);
1348 SETFLAG(flags, F_FOLLOWLINKS);
1351 SETFLAG(flags, F_CONSIDERHARDLINKS);
1354 minsize = strtoll(optarg, &endptr, 10);
1355 if (optarg[0] == '\0' || *endptr != '\0' || minsize < 0)
1357 errormsg("invalid value for --minsize: '%s'\n", optarg);
1362 maxsize = strtoll(optarg, &endptr, 10);
1363 if (optarg[0] == '\0' || *endptr != '\0' || maxsize < 0)
1365 errormsg("invalid value for --maxsize: '%s'\n", optarg);
1370 SETFLAG(flags, F_EXCLUDEEMPTY);
1373 SETFLAG(flags, F_EXCLUDEHIDDEN);
1376 SETFLAG(flags, F_DELETEFILES);
1379 SETFLAG(flags, F_PLAINPROMPT);
1382 printf("fdupes %s\n", VERSION);
1388 SETFLAG(flags, F_NOPROMPT);
1391 SETFLAG(flags, F_IMMEDIATE);
1394 SETFLAG(flags, F_SUMMARIZEMATCHES);
1397 SETFLAG(flags, F_PERMISSIONS);
1400 if (!strcasecmp("name", optarg)) {
1401 ordertype = ORDER_NAME;
1402 } else if (!strcasecmp("time", optarg)) {
1403 ordertype = ORDER_MTIME;
1404 } else if (!strcasecmp("ctime", optarg)) {
1405 ordertype = ORDER_CTIME;
1407 errormsg("invalid value for --order: '%s'\n", optarg);
1412 SETFLAG(flags, F_REVERSE);
1415 loginfo = log_open(logfile=optarg, &log_error);
1418 if (log_error == LOG_ERROR_NOT_A_LOG_FILE)
1419 errormsg("%s: doesn't look like an fdupes log file\n", logfile);
1421 errormsg("%s: could not open log file\n", logfile);
1427 if (stat(logfile, &logfile_status) != 0)
1429 errormsg("could not read log file status\n");
1435 fprintf(stderr, "Try `fdupes --help' for more information.\n");
1440 if (optind >= argc) {
1441 errormsg("no directories specified\n");
1445 if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
1446 errormsg("options --recurse and --recurse: are not compatible\n");
1450 if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
1451 errormsg("options --summarize and --delete are not compatible\n");
1455 if (ISFLAG(flags, F_RECURSEAFTER)) {
1456 firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind);
1458 if (firstrecurse == argc)
1459 firstrecurse = nonoptafter("-R", argc, oldargv, argv, optind);
1461 if (firstrecurse == argc) {
1462 errormsg("-R option must be isolated from other options\n");
1466 /* F_RECURSE is not set for directories before --recurse: */
1467 for (x = optind; x < firstrecurse; x++)
1468 filecount += grokdir(argv[x], &files, &logfile_status);
1470 /* Set F_RECURSE for directories after --recurse: */
1471 SETFLAG(flags, F_RECURSE);
1473 for (x = firstrecurse; x < argc; x++)
1474 filecount += grokdir(argv[x], &files, &logfile_status);
1476 for (x = optind; x < argc; x++)
1477 filecount += grokdir(argv[x], &files, &logfile_status);
1481 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1489 registerfile(&checktree, curfile);
1491 match = checkmatch(&checktree, checktree, curfile);
1493 if (match != NULL) {
1494 file1 = fopen(curfile->d_name, "rb");
1496 curfile = curfile->next;
1500 file2 = fopen((*match)->d_name, "rb");
1503 curfile = curfile->next;
1507 if (confirmmatch(file1, file2)) {
1508 if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_IMMEDIATE))
1509 deletesuccessor(match, curfile,
1510 (ordertype == ORDER_MTIME ||
1511 ordertype == ORDER_CTIME) ? sort_pairs_by_time : sort_pairs_by_filename, loginfo );
1513 registerpair(match, curfile,
1514 (ordertype == ORDER_MTIME ||
1515 ordertype == ORDER_CTIME) ? sort_pairs_by_time : sort_pairs_by_filename );
1522 curfile = curfile->next;
1524 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1525 fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
1526 (int)((float) progress / (float) filecount * 100.0));
1531 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1533 if (ISFLAG(flags, F_DELETEFILES))
1535 if (ISFLAG(flags, F_NOPROMPT))
1537 deletefiles(files, 0, 0, logfile);
1542 if (!ISFLAG(flags, F_PLAINPROMPT))
1544 if (newterm(getenv("TERM"), stdout, stdin) != 0)
1546 deletefiles_ncurses(files, logfile);
1550 errormsg("could not enter screen mode; falling back to plain mode\n\n");
1551 SETFLAG(flags, F_PLAINPROMPT);
1555 if (ISFLAG(flags, F_PLAINPROMPT))
1557 if (freopen("/dev/tty", "r", stdin) == NULL)
1559 errormsg("could not open terminal for input\n");
1563 deletefiles(files, 1, stdin, logfile);
1566 if (freopen("/dev/tty", "r", stdin) == NULL)
1568 errormsg("could not open terminal for input\n");
1572 deletefiles(files, 1, stdin, logfile);
1579 if (ISFLAG(flags, F_SUMMARIZEMATCHES))
1580 summarizematches(files);
1584 printmatches(files);
1587 curfile = files->next;
1588 free(files->d_name);
1589 free(files->crcsignature);
1590 free(files->crcpartial);
1595 for (x = 0; x < argc; x++)
1600 purgetree(checktree);