1 /* FDUPES Copyright (c) 1999-2018 Adrian Lopez
3 Permission is hereby granted, free of charge, to any person
4 obtaining a copy of this software and associated documentation files
5 (the "Software"), to deal in the Software without restriction,
6 including without limitation the rights to use, copy, modify, merge,
7 publish, distribute, sublicense, and/or sell copies of the Software,
8 and to permit persons to whom the Software is furnished to do so,
9 subject to the following conditions:
11 The above copyright notice and this permission notice shall be
12 included in all copies or substantial portions of the Software.
14 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15 OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16 MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
17 IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
18 CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
19 TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
20 SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
38 #ifdef HAVE_NCURSESW_CURSES_H
39 #include <ncursesw/curses.h>
45 #include "ncurses-interface.h"
50 long long minsize = -1;
51 long long maxsize = -1;
61 ordertype_t ordertype = ORDER_MTIME;
63 #define CHUNK_SIZE 8192
65 #define INPUT_SIZE 256
67 #define PARTIAL_MD5_SIZE 4096
69 #define MD5_DIGEST_LENGTH 16
73 TODO: Partial sums (for working with very large files).
75 typedef struct _signature
78 md5_byte_t digest[16];
81 typedef struct _signatures
84 signature_t *signatures;
89 typedef struct _filetree {
91 struct _filetree *left;
92 struct _filetree *right;
95 void escapefilename(char *escape_list, char **filename_ptr)
102 filename = *filename_ptr;
104 tmp = (char*) malloc(strlen(filename) * 2 + 1);
106 errormsg("out of memory!\n");
110 for (x = 0, tx = 0; x < strlen(filename); x++) {
111 if (strchr(escape_list, filename[x]) != NULL) tmp[tx++] = '\\';
112 tmp[tx++] = filename[x];
118 *filename_ptr = realloc(*filename_ptr, strlen(tmp) + 1);
119 if (*filename_ptr == NULL) {
120 errormsg("out of memory!\n");
123 strcpy(*filename_ptr, tmp);
127 off_t filesize(char *filename) {
130 if (stat(filename, &s) != 0) return -1;
135 dev_t getdevice(char *filename) {
138 if (stat(filename, &s) != 0) return 0;
143 ino_t getinode(char *filename) {
146 if (stat(filename, &s) != 0) return 0;
151 time_t getmtime(char *filename) {
154 if (stat(filename, &s) != 0) return 0;
159 time_t getctime(char *filename) {
162 if (stat(filename, &s) != 0) return 0;
167 char *fmtmtime(char *filename) {
169 time_t t = getmtime(filename);
171 strftime(buf, sizeof(buf), "%Y-%m-%d %H:%M", localtime(&t));
175 char **cloneargs(int argc, char **argv)
180 args = (char **) malloc(sizeof(char*) * argc);
182 errormsg("out of memory!\n");
186 for (x = 0; x < argc; x++) {
187 args[x] = (char*) malloc(strlen(argv[x]) + 1);
188 if (args[x] == NULL) {
190 errormsg("out of memory!\n");
194 strcpy(args[x], argv[x]);
200 int findarg(char *arg, int start, int argc, char **argv)
204 for (x = start; x < argc; x++)
205 if (strcmp(argv[x], arg) == 0)
211 /* Find the first non-option argument after specified option. */
212 int nonoptafter(char *option, int argc, char **oldargv,
213 char **newargv, int optind)
220 targetind = findarg(option, 1, argc, oldargv);
222 for (x = optind; x < argc; x++) {
223 testind = findarg(newargv[x], startat, argc, oldargv);
224 if (testind > targetind) return x;
225 else startat = testind;
231 void getfilestats(file_t *file)
233 file->size = filesize(file->d_name);
234 file->inode = getinode(file->d_name);
235 file->device = getdevice(file->d_name);
240 file->sorttime = getctime(file->d_name);
244 file->sorttime = getmtime(file->d_name);
249 int grokdir(char *dir, file_t **filelistp, struct stat *logfile_status)
253 struct dirent *dirinfo;
258 static int progress = 0;
259 static char indicator[] = "-\\|/";
260 char *fullname, *name;
266 errormsg("could not chdir to %s\n", dir);
270 while ((dirinfo = readdir(cd)) != NULL) {
271 if (strcmp(dirinfo->d_name, ".") && strcmp(dirinfo->d_name, "..")) {
272 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
273 fprintf(stderr, "\rBuilding file list %c ", indicator[progress]);
274 progress = (progress + 1) % 4;
277 newfile = (file_t*) malloc(sizeof(file_t));
280 errormsg("out of memory!\n");
283 } else newfile->next = *filelistp;
287 newfile->crcsignature = NULL;
288 newfile->crcpartial = NULL;
289 newfile->duplicates = NULL;
290 newfile->hasdupes = 0;
292 newfile->d_name = (char*)malloc(strlen(dir)+strlen(dirinfo->d_name)+2);
294 if (!newfile->d_name) {
295 errormsg("out of memory!\n");
301 strcpy(newfile->d_name, dir);
302 lastchar = strlen(dir) - 1;
303 if (lastchar >= 0 && dir[lastchar] != '/')
304 strcat(newfile->d_name, "/");
305 strcat(newfile->d_name, dirinfo->d_name);
307 if (ISFLAG(flags, F_EXCLUDEHIDDEN)) {
308 fullname = strdup(newfile->d_name);
311 errormsg("out of memory!\n");
316 name = basename(fullname);
317 if (name[0] == '.' && strcmp(name, ".") && strcmp(name, "..") ) {
318 free(newfile->d_name);
325 if (stat(newfile->d_name, &info) == -1) {
326 free(newfile->d_name);
331 size = filesize(newfile->d_name);
332 if (!S_ISDIR(info.st_mode) && (((size == 0 && ISFLAG(flags, F_EXCLUDEEMPTY)) || size < minsize || (size > maxsize && maxsize != -1)))) {
333 free(newfile->d_name);
338 if (info.st_dev == logfile_status->st_dev && info.st_ino == logfile_status->st_ino)
340 free(newfile->d_name);
345 if (lstat(newfile->d_name, &linfo) == -1) {
346 free(newfile->d_name);
351 if (S_ISDIR(info.st_mode)) {
352 if (ISFLAG(flags, F_RECURSE) && (ISFLAG(flags, F_FOLLOWLINKS) || !S_ISLNK(linfo.st_mode)))
353 filecount += grokdir(newfile->d_name, filelistp, logfile_status);
354 free(newfile->d_name);
357 if (S_ISREG(linfo.st_mode) || (S_ISLNK(linfo.st_mode) && ISFLAG(flags, F_FOLLOWLINKS))) {
358 getfilestats(newfile);
359 *filelistp = newfile;
362 free(newfile->d_name);
374 md5_byte_t *getcrcsignatureuntil(char *filename, off_t max_read)
379 static md5_byte_t digest[MD5_DIGEST_LENGTH];
380 static md5_byte_t chunk[CHUNK_SIZE];
386 fsize = filesize(filename);
388 if (max_read != 0 && fsize > max_read)
391 file = fopen(filename, "rb");
393 errormsg("error opening file %s\n", filename);
398 toread = (fsize >= CHUNK_SIZE) ? CHUNK_SIZE : fsize;
399 if (fread(chunk, toread, 1, file) != 1) {
400 errormsg("error reading from file %s\n", filename);
404 md5_append(&state, chunk, toread);
408 md5_finish(&state, digest);
415 md5_byte_t *getcrcsignature(char *filename)
417 return getcrcsignatureuntil(filename, 0);
420 md5_byte_t *getcrcpartialsignature(char *filename)
422 return getcrcsignatureuntil(filename, PARTIAL_MD5_SIZE);
425 int md5cmp(const md5_byte_t *a, const md5_byte_t *b)
429 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
433 else if (a[x] > b[x])
440 void md5copy(md5_byte_t *to, const md5_byte_t *from)
444 for (x = 0; x < MD5_DIGEST_LENGTH; ++x)
448 void purgetree(filetree_t *checktree)
450 if (checktree->left != NULL) purgetree(checktree->left);
452 if (checktree->right != NULL) purgetree(checktree->right);
457 int registerfile(filetree_t **branch, file_t *file)
459 *branch = (filetree_t*) malloc(sizeof(filetree_t));
460 if (*branch == NULL) {
461 errormsg("out of memory!\n");
465 (*branch)->file = file;
466 (*branch)->left = NULL;
467 (*branch)->right = NULL;
472 int same_permissions(char* name1, char* name2)
476 if (stat(name1, &s1) != 0) return -1;
477 if (stat(name2, &s2) != 0) return -1;
479 return (s1.st_mode == s2.st_mode &&
480 s1.st_uid == s2.st_uid &&
481 s1.st_gid == s2.st_gid);
484 int is_hardlink(filetree_t *checktree, file_t *file)
488 if ((file->inode == checktree->file->inode) &&
489 (file->device == checktree->file->device))
492 if (checktree->file->hasdupes)
494 dupe = checktree->file->duplicates;
497 if ((file->inode == dupe->inode) &&
498 (file->device == dupe->device))
501 dupe = dupe->duplicates;
502 } while (dupe != NULL);
508 /* check whether two paths represent the same file (deleting one would delete the other) */
509 int is_same_file(file_t *file_a, file_t *file_b)
517 struct stat dirstat_a;
518 struct stat dirstat_b;
520 /* if files on different devices and/or different inodes, they are not the same file */
521 if (file_a->device != file_b->device || file_a->inode != file_b->inode)
524 /* copy filenames (basename and dirname may modify these) */
525 filename_a = strdup(file_a->d_name);
529 filename_b = strdup(file_b->d_name);
533 /* get file basenames */
534 basename_a = basename(filename_a);
535 memmove(filename_a, basename_a, strlen(basename_a) + 1);
537 basename_b = basename(filename_b);
538 memmove(filename_b, basename_b, strlen(basename_b) + 1);
540 /* if files have different names, they are not the same file */
541 if (strcmp(filename_a, filename_b) != 0)
549 strcpy(filename_a, file_a->d_name);
550 strcpy(filename_b, file_b->d_name);
552 /* get directory names */
553 dirname_a = dirname(filename_a);
554 if (stat(dirname_a, &dirstat_a) != 0)
561 dirname_b = dirname(filename_b);
562 if (stat(dirname_b, &dirstat_b) != 0)
572 /* if directories on which files reside are different, they are not the same file */
573 if (dirstat_a.st_dev != dirstat_b.st_dev || dirstat_a.st_ino != dirstat_b.st_ino)
576 /* same device, inode, filename, and directory; therefore, same file */
580 /* check whether given tree node already contains a copy of given file */
581 int has_same_file(filetree_t *checktree, file_t *file)
585 if (is_same_file(checktree->file, file))
588 if (checktree->file->hasdupes)
590 dupe = checktree->file->duplicates;
593 if (is_same_file(dupe, file))
596 dupe = dupe->duplicates;
597 } while (dupe != NULL);
603 file_t **checkmatch(filetree_t **root, filetree_t *checktree, file_t *file)
606 md5_byte_t *crcsignature;
609 if (ISFLAG(flags, F_CONSIDERHARDLINKS))
611 /* If node already contains file, we don't want to add it again.
613 if (has_same_file(checktree, file))
618 /* If device and inode fields are equal one of the files is a
619 hard link to the other or the files have been listed twice
620 unintentionally. We don't want to flag these files as
621 duplicates unless the user specifies otherwise.
623 if (is_hardlink(checktree, file))
627 fsize = filesize(file->d_name);
629 if (fsize < checktree->file->size)
632 if (fsize > checktree->file->size) cmpresult = 1;
634 if (ISFLAG(flags, F_PERMISSIONS) &&
635 !same_permissions(file->d_name, checktree->file->d_name))
638 if (checktree->file->crcpartial == NULL) {
639 crcsignature = getcrcpartialsignature(checktree->file->d_name);
640 if (crcsignature == NULL) {
641 errormsg ("cannot read file %s\n", checktree->file->d_name);
645 checktree->file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
646 if (checktree->file->crcpartial == NULL) {
647 errormsg("out of memory\n");
650 md5copy(checktree->file->crcpartial, crcsignature);
653 if (file->crcpartial == NULL) {
654 crcsignature = getcrcpartialsignature(file->d_name);
655 if (crcsignature == NULL) {
656 errormsg ("cannot read file %s\n", file->d_name);
660 file->crcpartial = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
661 if (file->crcpartial == NULL) {
662 errormsg("out of memory\n");
665 md5copy(file->crcpartial, crcsignature);
668 cmpresult = md5cmp(file->crcpartial, checktree->file->crcpartial);
669 /*if (cmpresult != 0) errormsg(" on %s vs %s\n", file->d_name, checktree->file->d_name);*/
671 if (cmpresult == 0) {
672 if (checktree->file->crcsignature == NULL) {
673 crcsignature = getcrcsignature(checktree->file->d_name);
674 if (crcsignature == NULL) return NULL;
676 checktree->file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
677 if (checktree->file->crcsignature == NULL) {
678 errormsg("out of memory\n");
681 md5copy(checktree->file->crcsignature, crcsignature);
684 if (file->crcsignature == NULL) {
685 crcsignature = getcrcsignature(file->d_name);
686 if (crcsignature == NULL) return NULL;
688 file->crcsignature = (md5_byte_t*) malloc(MD5_DIGEST_LENGTH * sizeof(md5_byte_t));
689 if (file->crcsignature == NULL) {
690 errormsg("out of memory\n");
693 md5copy(file->crcsignature, crcsignature);
696 cmpresult = md5cmp(file->crcsignature, checktree->file->crcsignature);
697 /*if (cmpresult != 0) errormsg("P on %s vs %s\n",
698 file->d_name, checktree->file->d_name);
699 else errormsg("P F on %s vs %s\n", file->d_name,
700 checktree->file->d_name);
701 printf("%s matches %s\n", file->d_name, checktree->file->d_name);*/
706 if (checktree->left != NULL) {
707 return checkmatch(root, checktree->left, file);
709 registerfile(&(checktree->left), file);
712 } else if (cmpresult > 0) {
713 if (checktree->right != NULL) {
714 return checkmatch(root, checktree->right, file);
716 registerfile(&(checktree->right), file);
721 return &checktree->file;
725 /* Do a bit-for-bit comparison in case two different files produce the
726 same signature. Unlikely, but better safe than sorry. */
728 int confirmmatch(FILE *file1, FILE *file2)
730 unsigned char c1[CHUNK_SIZE];
731 unsigned char c2[CHUNK_SIZE];
735 fseek(file1, 0, SEEK_SET);
736 fseek(file2, 0, SEEK_SET);
739 r1 = fread(c1, sizeof(unsigned char), sizeof(c1), file1);
740 r2 = fread(c2, sizeof(unsigned char), sizeof(c2), file2);
742 if (r1 != r2) return 0; /* file lengths are different */
743 if (memcmp (c1, c2, r1)) return 0; /* file contents are different */
749 void summarizematches(file_t *files)
752 double numbytes = 0.0;
756 while (files != NULL)
762 tmpfile = files->duplicates;
763 while (tmpfile != NULL)
766 numbytes += files->size;
767 tmpfile = tmpfile->duplicates;
775 printf("No duplicates found.\n\n");
778 if (numbytes < 1024.0)
779 printf("%d duplicate files (in %d sets), occupying %.0f bytes.\n\n", numfiles, numsets, numbytes);
780 else if (numbytes <= (1000.0 * 1000.0))
781 printf("%d duplicate files (in %d sets), occupying %.1f kilobytes\n\n", numfiles, numsets, numbytes / 1000.0);
783 printf("%d duplicate files (in %d sets), occupying %.1f megabytes\n\n", numfiles, numsets, numbytes / (1000.0 * 1000.0));
788 void printmatches(file_t *files)
792 while (files != NULL) {
793 if (files->hasdupes) {
794 if (!ISFLAG(flags, F_OMITFIRST)) {
795 if (ISFLAG(flags, F_SHOWSIZE)) printf("%lld byte%seach:\n", (long long int)files->size,
796 (files->size != 1) ? "s " : " ");
797 if (ISFLAG(flags, F_SHOWTIME))
798 printf("%s ", fmtmtime(files->d_name));
799 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &files->d_name);
800 printf("%s%c", files->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
802 tmpfile = files->duplicates;
803 while (tmpfile != NULL) {
804 if (ISFLAG(flags, F_SHOWTIME))
805 printf("%s ", fmtmtime(tmpfile->d_name));
806 if (ISFLAG(flags, F_DSAMELINE)) escapefilename("\\ ", &tmpfile->d_name);
807 printf("%s%c", tmpfile->d_name, ISFLAG(flags, F_DSAMELINE)?' ':'\n');
808 tmpfile = tmpfile->duplicates;
819 #define REVISE_APPEND "_tmp"
820 char *revisefilename(char *path, int seq)
827 digits = numdigits(seq);
828 newpath = malloc(strlen(path) + strlen(REVISE_APPEND) + digits + 1);
829 if (!newpath) return newpath;
831 scratch = malloc(strlen(path) + 1);
832 if (!scratch) return newpath;
834 strcpy(scratch, path);
835 dot = strrchr(scratch, '.');
839 sprintf(newpath, "%s%s%d.%s", scratch, REVISE_APPEND, seq, dot + 1);
844 sprintf(newpath, "%s%s%d", path, REVISE_APPEND, seq);
852 int relink(char *oldfile, char *newfile)
859 od = getdevice(oldfile);
860 oi = getinode(oldfile);
862 if (link(oldfile, newfile) != 0)
865 /* make sure we're working with the right file (the one we created) */
866 nd = getdevice(newfile);
867 ni = getinode(newfile);
869 if (nd != od || oi != ni)
870 return 0; /* file is not what we expected */
875 void deletefiles(file_t *files, int prompt, FILE *tty, char *logfile)
892 struct log_info *loginfo;
898 if (curfile->hasdupes) {
902 tmpfile = curfile->duplicates;
905 tmpfile = tmpfile->duplicates;
908 if (counter > max) max = counter;
911 curfile = curfile->next;
916 dupelist = (file_t**) malloc(sizeof(file_t*) * max);
917 preserve = (int*) malloc(sizeof(int) * max);
918 preservestr = (char*) malloc(INPUT_SIZE);
920 if (!dupelist || !preserve || !preservestr) {
921 errormsg("out of memory\n");
927 loginfo = log_open(logfile, &log_error);
929 register_sigint_handler();
932 if (files->hasdupes) {
935 dupelist[counter] = files;
939 if (ISFLAG(flags, F_SHOWTIME))
940 printf("[%d] [%s] %s\n", counter, fmtmtime(files->d_name), files->d_name);
942 printf("[%d] %s\n", counter, files->d_name);
945 tmpfile = files->duplicates;
948 dupelist[++counter] = tmpfile;
951 if (ISFLAG(flags, F_SHOWTIME))
952 printf("[%d] [%s] %s\n", counter, fmtmtime(tmpfile->d_name), tmpfile->d_name);
954 printf("[%d] %s\n", counter, tmpfile->d_name);
956 tmpfile = tmpfile->duplicates;
959 if (prompt) printf("\n");
961 if (!prompt) /* preserve only the first file */
964 for (x = 2; x <= counter; x++) preserve[x] = 0;
967 else /* prompt for files to preserve */
970 printf("Set %d of %d, preserve files [1 - %d, all, quit]",
971 curgroup, groups, counter);
972 if (ISFLAG(flags, F_SHOWSIZE)) printf(" (%lld byte%seach)", (long long int)files->size,
973 (files->size != 1) ? "s " : " ");
977 if (!fgets(preservestr, INPUT_SIZE, tty))
979 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
980 preservestr[1] = '\0';
997 i = strlen(preservestr) - 1;
999 while (preservestr[i]!='\n'){ /* tail of buffer must be a newline */
1001 realloc(preservestr, strlen(preservestr) + 1 + INPUT_SIZE);
1002 if (!tstr) { /* couldn't allocate memory, treat as fatal */
1003 errormsg("out of memory!\n");
1008 if (!fgets(preservestr + i + 1, INPUT_SIZE, tty))
1010 preservestr[0] = '\n'; /* treat fgets() failure as if nothing was entered */
1011 preservestr[1] = '\0';
1014 i = strlen(preservestr)-1;
1017 if (strcmp(preservestr, "q\n") == 0 || strcmp(preservestr, "quit\n") == 0)
1031 for (x = 1; x <= counter; x++) preserve[x] = 0;
1033 token = strtok(preservestr, " ,\n");
1035 while (token != NULL) {
1036 if (strcasecmp(token, "all") == 0 || strcasecmp(token, "a") == 0)
1037 for (x = 0; x <= counter; x++) preserve[x] = 1;
1040 sscanf(token, "%d", &number);
1041 if (number > 0 && number <= counter) preserve[number] = 1;
1043 token = strtok(NULL, " ,\n");
1046 for (sum = 0, x = 1; x <= counter; x++) sum += preserve[x];
1047 } while (sum < 1); /* make sure we've preserved at least one file */
1052 log_begin_set(loginfo);
1054 for (x = 1; x <= counter; x++) {
1057 printf(" [+] %s\n", dupelist[x]->d_name);
1060 log_file_remaining(loginfo, dupelist[x]->d_name);
1063 if (remove(dupelist[x]->d_name) == 0) {
1064 printf(" [-] %s\n", dupelist[x]->d_name);
1067 log_file_deleted(loginfo, dupelist[x]->d_name);
1069 printf(" [!] %s ", dupelist[x]->d_name);
1070 printf("-- unable to delete file!\n");
1073 log_file_remaining(loginfo, dupelist[x]->d_name);
1080 log_end_set(loginfo);
1083 files = files->next;
1094 int sort_pairs_by_arrival(file_t *f1, file_t *f2)
1096 if (f2->duplicates != 0)
1097 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1099 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1102 int sort_pairs_by_time(file_t *f1, file_t *f2)
1104 if (f1->sorttime < f2->sorttime)
1105 return !ISFLAG(flags, F_REVERSE) ? -1 : 1;
1106 else if (f1->sorttime > f2->sorttime)
1107 return !ISFLAG(flags, F_REVERSE) ? 1 : -1;
1112 int sort_pairs_by_filename(file_t *f1, file_t *f2)
1114 int strvalue = strcmp(f1->d_name, f2->d_name);
1115 return !ISFLAG(flags, F_REVERSE) ? strvalue : -strvalue;
1118 void registerpair(file_t **matchlist, file_t *newmatch,
1119 int (*comparef)(file_t *f1, file_t *f2))
1124 (*matchlist)->hasdupes = 1;
1127 traverse = *matchlist;
1130 if (comparef(newmatch, traverse) <= 0)
1132 newmatch->duplicates = traverse;
1136 *matchlist = newmatch; /* update pointer to head of list */
1138 newmatch->hasdupes = 1;
1139 traverse->hasdupes = 0; /* flag is only for first file in dupe chain */
1142 back->duplicates = newmatch;
1148 if (traverse->duplicates == 0)
1150 traverse->duplicates = newmatch;
1153 traverse->hasdupes = 1;
1160 traverse = traverse->duplicates;
1164 void deletesuccessor(file_t **existing, file_t *duplicate,
1165 int (*comparef)(file_t *f1, file_t *f2), struct log_info *loginfo)
1170 if (comparef(duplicate, *existing) >= 0)
1172 to_keep = *existing;
1173 to_delete = duplicate;
1177 to_keep = duplicate;
1178 to_delete = *existing;
1180 *existing = duplicate;
1183 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1185 printf(" [+] %s\n", to_keep->d_name);
1188 log_file_remaining(loginfo, to_keep->d_name);
1190 if (remove(to_delete->d_name) == 0) {
1191 printf(" [-] %s\n", to_delete->d_name);
1194 log_file_deleted(loginfo, to_delete->d_name);
1196 printf(" [!] %s ", to_delete->d_name);
1197 printf("-- unable to delete file!\n");
1200 log_file_remaining(loginfo, to_delete->d_name);
1208 printf("Usage: fdupes [options] DIRECTORY...\n\n");
1210 printf(" -r --recurse \tfor every directory given follow subdirectories\n");
1211 printf(" \tencountered within\n");
1212 printf(" -R --recurse: \tfor each directory given after this option follow\n");
1213 printf(" \tsubdirectories encountered within (note the ':' at\n");
1214 printf(" \tthe end of the option, manpage for more details)\n");
1215 printf(" -s --symlinks \tfollow symlinks\n");
1216 printf(" -H --hardlinks \tnormally, when two or more files point to the same\n");
1217 printf(" \tdisk area they are treated as non-duplicates; this\n");
1218 printf(" \toption will change this behavior\n");
1219 printf(" -G --minsize=SIZE\tconsider only files greater than or equal to SIZE\n");
1220 printf(" -L --maxsize=SIZE\tconsider only files less than or equal to SIZE\n");
1221 printf(" -n --noempty \texclude zero-length files from consideration\n");
1222 printf(" -A --nohidden \texclude hidden files from consideration\n");
1223 printf(" -f --omitfirst \tomit the first file in each set of matches\n");
1224 printf(" -1 --sameline \tlist each set of matches on a single line\n");
1225 printf(" -S --size \tshow size of duplicate files\n");
1226 printf(" -t --time \tshow modification time of duplicate files\n");
1227 printf(" -m --summarize \tsummarize dupe information\n");
1228 printf(" -q --quiet \thide progress indicator\n");
1229 printf(" -d --delete \tprompt user for files to preserve and delete all\n");
1230 printf(" \tothers; important: under particular circumstances,\n");
1231 printf(" \tdata may be lost when using this option together\n");
1232 printf(" \twith -s or --symlinks, or when specifying a\n");
1233 printf(" \tparticular directory more than once; refer to the\n");
1234 printf(" \tfdupes documentation for additional information\n");
1236 printf(" -P --plain \twith --delete, use line-based prompt (as with older\n");
1237 printf(" \tversions of fdupes) instead of screen-mode interface\n");
1239 printf(" -N --noprompt \ttogether with --delete, preserve the first file in\n");
1240 printf(" \teach set of duplicates and delete the rest without\n");
1241 printf(" \tprompting the user\n");
1242 printf(" -I --immediate \tdelete duplicates as they are encountered, without\n");
1243 printf(" \tgrouping into sets; implies --noprompt\n");
1244 printf(" -p --permissions \tdon't consider files with different owner/group or\n");
1245 printf(" \tpermission bits as duplicates\n");
1246 printf(" -o --order=BY \tselect sort order for output and deleting; by file\n");
1247 printf(" \tmodification time (BY='time'; default), status\n");
1248 printf(" \tchange time (BY='ctime'), or filename (BY='name')\n");
1249 printf(" -i --reverse \treverse order while sorting\n");
1250 printf(" -l --log=LOGFILE \tlog file deletion choices to LOGFILE\n");
1251 printf(" -v --version \tdisplay fdupes version\n");
1252 printf(" -h --help \tdisplay this help message\n\n");
1253 #ifndef HAVE_GETOPT_H
1254 printf("Note: Long options are not supported in this fdupes build.\n\n");
1258 int main(int argc, char **argv) {
1263 file_t *files = NULL;
1265 file_t **match = NULL;
1266 filetree_t *checktree = NULL;
1272 struct log_info *loginfo;
1274 struct stat logfile_status;
1277 #ifdef HAVE_GETOPT_H
1278 static struct option long_options[] =
1280 { "omitfirst", 0, 0, 'f' },
1281 { "recurse", 0, 0, 'r' },
1282 { "recurse:", 0, 0, 'R' },
1283 { "quiet", 0, 0, 'q' },
1284 { "sameline", 0, 0, '1' },
1285 { "size", 0, 0, 'S' },
1286 { "time", 0, 0, 't' },
1287 { "symlinks", 0, 0, 's' },
1288 { "hardlinks", 0, 0, 'H' },
1289 { "minsize", 1, 0, 'G' },
1290 { "maxsize", 1, 0, 'L' },
1291 { "noempty", 0, 0, 'n' },
1292 { "nohidden", 0, 0, 'A' },
1293 { "delete", 0, 0, 'd' },
1294 { "plain", 0, 0, 'P' },
1295 { "version", 0, 0, 'v' },
1296 { "help", 0, 0, 'h' },
1297 { "noprompt", 0, 0, 'N' },
1298 { "immediate", 0, 0, 'I'},
1299 { "summarize", 0, 0, 'm'},
1300 { "summary", 0, 0, 'm' },
1301 { "permissions", 0, 0, 'p' },
1302 { "order", 1, 0, 'o' },
1303 { "reverse", 0, 0, 'i' },
1304 { "log", 1, 0, 'l' },
1307 #define GETOPT getopt_long
1309 #define GETOPT getopt
1312 program_name = argv[0];
1314 setlocale(LC_CTYPE, "");
1316 oldargv = cloneargs(argc, argv);
1318 while ((opt = GETOPT(argc, argv, "frRq1StsHG:L:nAdPvhNImpo:il:"
1319 #ifdef HAVE_GETOPT_H
1320 , long_options, NULL
1325 SETFLAG(flags, F_OMITFIRST);
1328 SETFLAG(flags, F_RECURSE);
1331 SETFLAG(flags, F_RECURSEAFTER);
1334 SETFLAG(flags, F_HIDEPROGRESS);
1337 SETFLAG(flags, F_DSAMELINE);
1340 SETFLAG(flags, F_SHOWSIZE);
1343 SETFLAG(flags, F_SHOWTIME);
1346 SETFLAG(flags, F_FOLLOWLINKS);
1349 SETFLAG(flags, F_CONSIDERHARDLINKS);
1352 minsize = strtoll(optarg, &endptr, 10);
1353 if (optarg[0] == '\0' || *endptr != '\0' || minsize < 0)
1355 errormsg("invalid value for --minsize: '%s'\n", optarg);
1360 maxsize = strtoll(optarg, &endptr, 10);
1361 if (optarg[0] == '\0' || *endptr != '\0' || maxsize < 0)
1363 errormsg("invalid value for --maxsize: '%s'\n", optarg);
1368 SETFLAG(flags, F_EXCLUDEEMPTY);
1371 SETFLAG(flags, F_EXCLUDEHIDDEN);
1374 SETFLAG(flags, F_DELETEFILES);
1377 SETFLAG(flags, F_PLAINPROMPT);
1380 printf("fdupes %s\n", VERSION);
1386 SETFLAG(flags, F_NOPROMPT);
1389 SETFLAG(flags, F_IMMEDIATE);
1392 SETFLAG(flags, F_SUMMARIZEMATCHES);
1395 SETFLAG(flags, F_PERMISSIONS);
1398 if (!strcasecmp("name", optarg)) {
1399 ordertype = ORDER_NAME;
1400 } else if (!strcasecmp("time", optarg)) {
1401 ordertype = ORDER_MTIME;
1402 } else if (!strcasecmp("ctime", optarg)) {
1403 ordertype = ORDER_CTIME;
1405 errormsg("invalid value for --order: '%s'\n", optarg);
1410 SETFLAG(flags, F_REVERSE);
1413 loginfo = log_open(logfile=optarg, &log_error);
1416 if (log_error == LOG_ERROR_NOT_A_LOG_FILE)
1417 errormsg("%s: doesn't look like an fdupes log file\n", logfile);
1419 errormsg("%s: could not open log file\n", logfile);
1425 if (stat(logfile, &logfile_status) != 0)
1427 errormsg("could not read log file status\n");
1433 fprintf(stderr, "Try `fdupes --help' for more information.\n");
1438 if (optind >= argc) {
1439 errormsg("no directories specified\n");
1443 if (ISFLAG(flags, F_RECURSE) && ISFLAG(flags, F_RECURSEAFTER)) {
1444 errormsg("options --recurse and --recurse: are not compatible\n");
1448 if (ISFLAG(flags, F_SUMMARIZEMATCHES) && ISFLAG(flags, F_DELETEFILES)) {
1449 errormsg("options --summarize and --delete are not compatible\n");
1453 if (ISFLAG(flags, F_RECURSEAFTER)) {
1454 firstrecurse = nonoptafter("--recurse:", argc, oldargv, argv, optind);
1456 if (firstrecurse == argc)
1457 firstrecurse = nonoptafter("-R", argc, oldargv, argv, optind);
1459 if (firstrecurse == argc) {
1460 errormsg("-R option must be isolated from other options\n");
1464 /* F_RECURSE is not set for directories before --recurse: */
1465 for (x = optind; x < firstrecurse; x++)
1466 filecount += grokdir(argv[x], &files, &logfile_status);
1468 /* Set F_RECURSE for directories after --recurse: */
1469 SETFLAG(flags, F_RECURSE);
1471 for (x = firstrecurse; x < argc; x++)
1472 filecount += grokdir(argv[x], &files, &logfile_status);
1474 for (x = optind; x < argc; x++)
1475 filecount += grokdir(argv[x], &files, &logfile_status);
1479 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1487 registerfile(&checktree, curfile);
1489 match = checkmatch(&checktree, checktree, curfile);
1491 if (match != NULL) {
1492 file1 = fopen(curfile->d_name, "rb");
1494 curfile = curfile->next;
1498 file2 = fopen((*match)->d_name, "rb");
1501 curfile = curfile->next;
1505 if (confirmmatch(file1, file2)) {
1506 if (ISFLAG(flags, F_DELETEFILES) && ISFLAG(flags, F_IMMEDIATE))
1507 deletesuccessor(match, curfile,
1508 (ordertype == ORDER_MTIME ||
1509 ordertype == ORDER_CTIME) ? sort_pairs_by_time : sort_pairs_by_filename, loginfo );
1511 registerpair(match, curfile,
1512 (ordertype == ORDER_MTIME ||
1513 ordertype == ORDER_CTIME) ? sort_pairs_by_time : sort_pairs_by_filename );
1520 curfile = curfile->next;
1522 if (!ISFLAG(flags, F_HIDEPROGRESS)) {
1523 fprintf(stderr, "\rProgress [%d/%d] %d%% ", progress, filecount,
1524 (int)((float) progress / (float) filecount * 100.0));
1529 if (!ISFLAG(flags, F_HIDEPROGRESS)) fprintf(stderr, "\r%40s\r", " ");
1531 if (ISFLAG(flags, F_DELETEFILES))
1533 if (ISFLAG(flags, F_NOPROMPT))
1535 deletefiles(files, 0, 0, logfile);
1540 if (!ISFLAG(flags, F_PLAINPROMPT))
1542 if (newterm(getenv("TERM"), stdout, stdin) != 0)
1544 deletefiles_ncurses(files, logfile);
1548 errormsg("could not enter screen mode; falling back to plain mode\n\n");
1549 SETFLAG(flags, F_PLAINPROMPT);
1553 if (ISFLAG(flags, F_PLAINPROMPT))
1555 if (freopen("/dev/tty", "r", stdin) == NULL)
1557 errormsg("could not open terminal for input\n");
1561 deletefiles(files, 1, stdin, logfile);
1564 if (freopen("/dev/tty", "r", stdin) == NULL)
1566 errormsg("could not open terminal for input\n");
1570 deletefiles(files, 1, stdin, logfile);
1577 if (ISFLAG(flags, F_SUMMARIZEMATCHES))
1578 summarizematches(files);
1582 printmatches(files);
1585 curfile = files->next;
1586 free(files->d_name);
1587 free(files->crcsignature);
1588 free(files->crcpartial);
1593 for (x = 0; x < argc; x++)
1598 purgetree(checktree);