btrfs-progs: fix check of running scrub
[platform/upstream/btrfs-progs.git] / cmds-scrub.c
index 9179166..06c2a30 100644 (file)
@@ -59,6 +59,7 @@ struct scrub_stats {
        u64 duration;
        u64 finished;
        u64 canceled;
+       int in_progress;
 };
 
 /* TBD: replace with #include "linux/ioprio.h" in some years */
@@ -251,7 +252,11 @@ static void _print_scrub_ss(struct scrub_stats *ss)
                printf(" and was aborted after %llu seconds\n",
                       ss->duration);
        } else {
-               printf(", running for %llu seconds\n", ss->duration);
+               if (ss->in_progress)
+                       printf(", running for %llu seconds\n", ss->duration);
+               else
+                       printf(", interrupted after %llu seconds, not running\n",
+                                       ss->duration);
        }
 }
 
@@ -285,7 +290,7 @@ static void print_fs_stat(struct scrub_fs_stat *fs_stat, int raw)
 static void free_history(struct scrub_file_record **last_scrubs)
 {
        struct scrub_file_record **l = last_scrubs;
-       if (!l)
+       if (!l || IS_ERR(l))
                return;
        while (*l)
                free(*l++);
@@ -474,9 +479,6 @@ static struct scrub_file_record **scrub_read_file(int fd, int report_errors)
        char empty_uuid[BTRFS_FSID_SIZE] = {0};
        struct scrub_file_record **p = NULL;
 
-       if (fd < 0)
-               return ERR_PTR(-EINVAL);
-
 again:
        old_avail = avail - i;
        BUG_ON(old_avail < 0);
@@ -553,7 +555,7 @@ again:
                                ;
                        if (i + j + 1 >= avail)
                                _SCRUB_INVALID;
-                       if (j != 36)
+                       if (j != BTRFS_UUID_UNPARSED_SIZE - 1)
                                _SCRUB_INVALID;
                        l[i + j] = '\0';
                        ret = uuid_parse(l + i, p[curr]->fsid);
@@ -773,31 +775,31 @@ static int scrub_write_progress(pthread_mutex_t *m, const char *fsid,
        int fd = -1;
        int old;
 
-       ret = pthread_mutex_lock(m);
+       ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old);
        if (ret) {
                err = -ret;
-               goto out;
+               goto out3;
        }
 
-       ret = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &old);
+       ret = pthread_mutex_lock(m);
        if (ret) {
                err = -ret;
-               goto out;
+               goto out2;
        }
 
        fd = scrub_open_file_w(SCRUB_DATA_FILE, fsid, "tmp");
        if (fd < 0) {
                err = fd;
-               goto out;
+               goto out1;
        }
        err = scrub_write_file(fd, fsid, data, n);
        if (err)
-               goto out;
+               goto out1;
        err = scrub_rename_file(SCRUB_DATA_FILE, fsid, "tmp");
        if (err)
-               goto out;
+               goto out1;
 
-out:
+out1:
        if (fd >= 0) {
                ret = close(fd);
                if (ret)
@@ -808,10 +810,12 @@ out:
        if (ret && !err)
                err = -ret;
 
+out2:
        ret = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old);
        if (ret && !err)
                err = -ret;
 
+out3:
        return err;
 }
 
@@ -863,11 +867,11 @@ static void *progress_one_dev(void *ctx)
 /* nb: returns a negative errno via ERR_PTR */
 static void *scrub_progress_cycle(void *ctx)
 {
-       int ret;
+       int ret = 0;
        int  perr = 0;  /* positive / pthread error returns */
        int old;
        int i;
-       char fsid[37];
+       char fsid[BTRFS_UUID_UNPARSED_SIZE];
        struct scrub_progress *sp;
        struct scrub_progress *sp_last;
        struct scrub_progress *sp_shared;
@@ -943,6 +947,10 @@ static void *scrub_progress_cycle(void *ctx)
                         * result we got for the current write and go
                         * on. flag should be set on next cycle, then.
                         */
+                       perr = pthread_setcancelstate(
+                                       PTHREAD_CANCEL_DISABLE, &old);
+                       if (perr)
+                               goto out;
                        perr = pthread_mutex_lock(&sp_shared->progress_mutex);
                        if (perr)
                                goto out;
@@ -951,12 +959,20 @@ static void *scrub_progress_cycle(void *ctx)
                                                &sp_shared->progress_mutex);
                                if (perr)
                                        goto out;
+                               perr = pthread_setcancelstate(
+                                               PTHREAD_CANCEL_ENABLE, &old);
+                               if (perr)
+                                       goto out;
                                memcpy(sp, sp_last, sizeof(*sp));
                                continue;
                        }
                        perr = pthread_mutex_unlock(&sp_shared->progress_mutex);
                        if (perr)
                                goto out;
+                       perr = pthread_setcancelstate(
+                                       PTHREAD_CANCEL_ENABLE, &old);
+                       if (perr)
+                               goto out;
                        memcpy(sp, sp_shared, sizeof(*sp));
                        memcpy(sp_last, sp_shared, sizeof(*sp));
                }
@@ -1007,7 +1023,7 @@ static struct scrub_file_record *last_dev_scrub(
        return NULL;
 }
 
-int mkdir_p(char *path)
+static int mkdir_p(char *path)
 {
        int i;
        int ret;
@@ -1018,7 +1034,7 @@ int mkdir_p(char *path)
                path[i] = '\0';
                ret = mkdir(path, 0777);
                if (ret && errno != EEXIST)
-                       return 1;
+                       return -errno;
                path[i] = '/';
        }
 
@@ -1046,6 +1062,24 @@ static int is_scrub_running_on_fs(struct btrfs_ioctl_fs_info_args *fi_args,
        return 0;
 }
 
+static int is_scrub_running_in_kernel(int fd,
+               struct btrfs_ioctl_dev_info_args *di_args, u64 max_devices)
+{
+       struct scrub_progress sp;
+       int i;
+       int ret;
+
+       for (i = 0; i < max_devices; i++) {
+               memset(&sp, 0, sizeof(sp));
+               sp.scrub_args.devid = di_args[i].devid;
+               ret = ioctl(fd, BTRFS_IOC_SCRUB_PROGRESS, &sp.scrub_args);
+               if (!ret)
+                       return 1;
+       }
+
+       return 0;
+}
+
 static const char * const cmd_scrub_start_usage[];
 static const char * const cmd_scrub_resume_usage[];
 
@@ -1085,20 +1119,21 @@ static int scrub_start(int argc, char **argv, int resume)
        };
        pthread_t *t_devs = NULL;
        pthread_t t_prog;
-       pthread_attr_t t_attr;
        struct scrub_file_record **past_scrubs = NULL;
        struct scrub_file_record *last_scrub = NULL;
        char *datafile = strdup(SCRUB_DATA_FILE);
-       char fsid[37];
+       char fsid[BTRFS_UUID_UNPARSED_SIZE];
        char sock_path[BTRFS_PATH_NAME_MAX + 1] = "";
        struct scrub_progress_cycle spc;
        pthread_mutex_t spc_write_mutex = PTHREAD_MUTEX_INITIALIZER;
        void *terr;
        u64 devid;
        DIR *dirstream = NULL;
+       int force = 0;
+       int nothing_to_resume = 0;
 
        optind = 1;
-       while ((c = getopt(argc, argv, "BdqrRc:n:")) != -1) {
+       while ((c = getopt(argc, argv, "BdqrRc:n:f")) != -1) {
                switch (c) {
                case 'B':
                        do_background = 0;
@@ -1123,6 +1158,9 @@ static int scrub_start(int argc, char **argv, int resume)
                case 'n':
                        ioprio_classdata = (int)strtol(optarg, NULL, 10);
                        break;
+               case 'f':
+                       force = 1;
+                       break;
                case '?':
                default:
                        usage(resume ? cmd_scrub_resume_usage :
@@ -1154,8 +1192,14 @@ static int scrub_start(int argc, char **argv, int resume)
        fdmnt = open_path_or_dev_mnt(path, &dirstream);
 
        if (fdmnt < 0) {
-               ERR(!do_quiet, "ERROR: can't access '%s'\n", path);
-               return 12;
+               if (errno == EINVAL)
+                       ERR(!do_quiet,
+                           "ERROR: '%s' is not a mounted btrfs device\n",
+                           path);
+               else
+                       ERR(!do_quiet, "ERROR: can't access '%s': %s\n",
+                           path, strerror(errno));
+               return 1;
        }
 
        ret = get_fs_info(path, &fi_args, &di_args);
@@ -1185,6 +1229,13 @@ static int scrub_start(int argc, char **argv, int resume)
        }
 
        /*
+        * Check for stale information in the status file, ie. if it's
+        * canceled=0, finished=0 but no scrub is running.
+        */
+       if (!is_scrub_running_in_kernel(fdmnt, di_args, fi_args.num_devices))
+               force = 1;
+
+       /*
         * check whether any involved device is already busy running a
         * scrub. This would cause damaged status messages and the state
         * "aborted" without the explanation that a scrub was already
@@ -1195,7 +1246,7 @@ static int scrub_start(int argc, char **argv, int resume)
         * is a normal mode of operation to start scrub on multiple
         * single devices, there is no reason to prevent this.
         */
-       if (is_scrub_running_on_fs(&fi_args, di_args, past_scrubs)) {
+       if (!force && is_scrub_running_on_fs(&fi_args, di_args, past_scrubs)) {
                ERR(!do_quiet,
                    "ERROR: scrub is already running.\n"
                    "To cancel use 'btrfs scrub cancel %s'.\n"
@@ -1215,14 +1266,6 @@ static int scrub_start(int argc, char **argv, int resume)
                goto out;
        }
 
-       ret = pthread_attr_init(&t_attr);
-       if (ret) {
-               ERR(!do_quiet, "ERROR: pthread_attr_init failed: %s\n",
-                   strerror(ret));
-               err = 1;
-               goto out;
-       }
-
        for (i = 0; i < fi_args.num_devices; ++i) {
                devid = di_args[i].devid;
                ret = pthread_mutex_init(&sp[i].progress_mutex, NULL);
@@ -1261,7 +1304,7 @@ static int scrub_start(int argc, char **argv, int resume)
                if (!do_quiet)
                        printf("scrub: nothing to resume for %s, fsid %s\n",
                               path, fsid);
-               err = 0;
+               nothing_to_resume = 1;
                goto out;
        }
 
@@ -1370,7 +1413,7 @@ static int scrub_start(int argc, char **argv, int resume)
                devid = di_args[i].devid;
                gettimeofday(&tv, NULL);
                sp[i].stats.t_start = tv.tv_sec;
-               ret = pthread_create(&t_devs[i], &t_attr,
+               ret = pthread_create(&t_devs[i], NULL,
                                        scrub_one_dev, &sp[i]);
                if (ret) {
                        if (do_print)
@@ -1388,7 +1431,7 @@ static int scrub_start(int argc, char **argv, int resume)
        spc.write_mutex = &spc_write_mutex;
        spc.shared_progress = sp;
        spc.fi = &fi_args;
-       ret = pthread_create(&t_prog, &t_attr, scrub_progress_cycle, &spc);
+       ret = pthread_create(&t_prog, NULL, scrub_progress_cycle, &spc);
        if (ret) {
                if (do_print)
                        fprintf(stderr, "ERROR: creating progress thread "
@@ -1500,24 +1543,31 @@ out:
 
        if (err)
                return 1;
+       if (nothing_to_resume)
+               return 2;
+       if (e_uncorrectable) {
+               ERR(!do_quiet, "ERROR: There are uncorrectable errors.\n");
+               return 3;
+       }
        if (e_correctable)
-               return 7;
-       if (e_uncorrectable)
-               return 8;
+               ERR(!do_quiet, "WARNING: errors detected during scrubbing, corrected.\n");
+
        return 0;
 }
 
 static const char * const cmd_scrub_start_usage[] = {
-       "btrfs scrub start [-BdqrR] [-c ioprio_class -n ioprio_classdata] <path>|<device>",
-       "Start a new scrub",
+       "btrfs scrub start [-BdqrRf] [-c ioprio_class -n ioprio_classdata] <path>|<device>",
+       "Start a new scrub. If a scrub is already running, the new one fails.",
        "",
        "-B     do not background",
        "-d     stats per device (-B only)",
        "-q     be quiet",
        "-r     read only mode",
-       "-R     raw print mode, print full data instead of summary"
+       "-R     raw print mode, print full data instead of summary",
        "-c     set ioprio class (see ionice(1) manpage)",
        "-n     set ioprio classdata (see ionice(1) manpage)",
+       "-f     force starting new scrub even if a scrub is already running",
+       "       this is useful when scrub stats record file is damaged",
        NULL
 };
 
@@ -1546,8 +1596,13 @@ static int cmd_scrub_cancel(int argc, char **argv)
 
        fdmnt = open_path_or_dev_mnt(path, &dirstream);
        if (fdmnt < 0) {
-               fprintf(stderr, "ERROR: could not open %s: %s\n",
-                       path, strerror(errno));
+               if (errno == EINVAL)
+                       fprintf(stderr,
+                               "ERROR: '%s' is not a mounted btrfs device\n",
+                               path);
+               else
+                       fprintf(stderr, "ERROR: can't access '%s': %s\n",
+                               path, strerror(errno));
                ret = 1;
                goto out;
        }
@@ -1557,7 +1612,10 @@ static int cmd_scrub_cancel(int argc, char **argv)
        if (ret < 0) {
                fprintf(stderr, "ERROR: scrub cancel failed on %s: %s\n", path,
                        errno == ENOTCONN ? "not running" : strerror(errno));
-               ret = 1;
+               if (errno == ENOTCONN)
+                       ret = 2;
+               else
+                       ret = 1;
                goto out;
        }
 
@@ -1577,6 +1635,7 @@ static const char * const cmd_scrub_resume_usage[] = {
        "-d     stats per device (-B only)",
        "-q     be quiet",
        "-r     read only mode",
+       "-R     raw print mode, print full data instead of summary",
        "-c     set ioprio class (see ionice(1) manpage)",
        "-n     set ioprio classdata (see ionice(1) manpage)",
        NULL
@@ -1607,13 +1666,14 @@ static int cmd_scrub_status(int argc, char **argv)
        struct sockaddr_un addr = {
                .sun_family = AF_UNIX,
        };
+       int in_progress;
        int ret;
        int i;
        int fdmnt;
        int print_raw = 0;
        int do_stats_per_dev = 0;
        int c;
-       char fsid[37];
+       char fsid[BTRFS_UUID_UNPARSED_SIZE];
        int fdres = -1;
        int err = 0;
        DIR *dirstream = NULL;
@@ -1641,8 +1701,14 @@ static int cmd_scrub_status(int argc, char **argv)
        fdmnt = open_path_or_dev_mnt(path, &dirstream);
 
        if (fdmnt < 0) {
-               fprintf(stderr, "ERROR: can't access to '%s'\n", path);
-               return 12;
+               if (errno == EINVAL)
+                       fprintf(stderr,
+                               "ERROR: '%s' is not a mounted btrfs device\n",
+                               path);
+               else
+                       fprintf(stderr, "ERROR: can't access '%s': %s\n",
+                               path, strerror(errno));
+               return 1;
        }
 
        ret = get_fs_info(path, &fi_args, &di_args);
@@ -1690,6 +1756,7 @@ static int cmd_scrub_status(int argc, char **argv)
                        fprintf(stderr, "WARNING: failed to read status: %s\n",
                                strerror(-PTR_ERR(past_scrubs)));
        }
+       in_progress = is_scrub_running_in_kernel(fdmnt, di_args, fi_args.num_devices);
 
        printf("scrub status for %s\n", fsid);
 
@@ -1702,6 +1769,7 @@ static int cmd_scrub_status(int argc, char **argv)
                                                NULL, NULL);
                                continue;
                        }
+                       last_scrub->stats.in_progress = in_progress;
                        print_scrub_dev(&di_args[i], &last_scrub->p, print_raw,
                                        last_scrub->stats.finished ?
                                                        "history" : "status",
@@ -1709,6 +1777,7 @@ static int cmd_scrub_status(int argc, char **argv)
                }
        } else {
                init_fs_stat(&fs_stat);
+               fs_stat.s.in_progress = in_progress;
                for (i = 0; i < fi_args.num_devices; ++i) {
                        last_scrub = last_dev_scrub(past_scrubs,
                                                        di_args[i].devid);
@@ -1727,7 +1796,7 @@ out:
                close(fdres);
        close_file_or_dir(fdmnt, dirstream);
 
-       return err;
+       return !!err;
 }
 
 const struct cmd_group scrub_cmd_group = {
@@ -1736,7 +1805,7 @@ const struct cmd_group scrub_cmd_group = {
                { "cancel", cmd_scrub_cancel, cmd_scrub_cancel_usage, NULL, 0 },
                { "resume", cmd_scrub_resume, cmd_scrub_resume_usage, NULL, 0 },
                { "status", cmd_scrub_status, cmd_scrub_status_usage, NULL, 0 },
-               { 0, 0, 0, 0, 0 }
+               NULL_CMD_STRUCT
        }
 };