check/mode-common.c

   1 /*
   2  * This program is free software; you can redistribute it and/or
   3  * modify it under the terms of the GNU General Public
   4  * License v2 as published by the Free Software Foundation.
   5  *
   6  * This program is distributed in the hope that it will be useful,
   7  * but WITHOUT ANY WARRANTY; without even the implied warranty of
   8  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   9  * General Public License for more details.
  10  *
  11  * You should have received a copy of the GNU General Public
  12  * License along with this program; if not, write to the
  13  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  14  * Boston, MA 021110-1307, USA.
  15  */
  16
  17 #include <time.h>
  18 #include "ctree.h"
  19 #include "internal.h"
  20 #include "messages.h"
  21 #include "transaction.h"
  22 #include "utils.h"
  23 #include "disk-io.h"
  24 #include "check/mode-common.h"
  25
  26 /*
  27  * Check if the inode referenced by the given data reference uses the extent
  28  * at disk_bytenr as a non-prealloc extent.
  29  *
  30  * Returns 1 if true, 0 if false and < 0 on error.
  31  */
  32 static int check_prealloc_data_ref(struct btrfs_fs_info *fs_info,
  33                                    u64 disk_bytenr,
  34                                    struct btrfs_extent_data_ref *dref,
  35                                    struct extent_buffer *eb)
  36 {
  37         u64 rootid = btrfs_extent_data_ref_root(eb, dref);
  38         u64 objectid = btrfs_extent_data_ref_objectid(eb, dref);
  39         u64 offset = btrfs_extent_data_ref_offset(eb, dref);
  40         struct btrfs_root *root;
  41         struct btrfs_key key;
  42         struct btrfs_path path;
  43         int ret;
  44
  45         btrfs_init_path(&path);
  46         key.objectid = rootid;
  47         key.type = BTRFS_ROOT_ITEM_KEY;
  48         key.offset = (u64)-1;
  49         root = btrfs_read_fs_root(fs_info, &key);
  50         if (IS_ERR(root)) {
  51                 ret = PTR_ERR(root);
  52                 goto out;
  53         }
  54
  55         key.objectid = objectid;
  56         key.type = BTRFS_EXTENT_DATA_KEY;
  57         key.offset = offset;
  58         ret = btrfs_search_slot(NULL, root, &key, &path, 0, 0);
  59         if (ret > 0) {
  60                 fprintf(stderr,
  61                 "Missing file extent item for inode %llu, root %llu, offset %llu",
  62                         objectid, rootid, offset);
  63                 ret = -ENOENT;
  64         }
  65         if (ret < 0)
  66                 goto out;
  67
  68         while (true) {
  69                 struct btrfs_file_extent_item *fi;
  70                 int extent_type;
  71
  72                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
  73                         ret = btrfs_next_leaf(root, &path);
  74                         if (ret < 0)
  75                                 goto out;
  76                         if (ret > 0)
  77                                 break;
  78                 }
  79
  80                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
  81                 if (key.objectid != objectid ||
  82                     key.type != BTRFS_EXTENT_DATA_KEY)
  83                         break;
  84
  85                 fi = btrfs_item_ptr(path.nodes[0], path.slots[0],
  86                                     struct btrfs_file_extent_item);
  87                 extent_type = btrfs_file_extent_type(path.nodes[0], fi);
  88                 if (extent_type != BTRFS_FILE_EXTENT_REG &&
  89                     extent_type != BTRFS_FILE_EXTENT_PREALLOC)
  90                         goto next;
  91
  92                 if (btrfs_file_extent_disk_bytenr(path.nodes[0], fi) !=
  93                     disk_bytenr)
  94                         break;
  95
  96                 if (extent_type == BTRFS_FILE_EXTENT_REG) {
  97                         ret = 1;
  98                         goto out;
  99                 }
 100 next:
 101                 path.slots[0]++;
 102         }
 103         ret = 0;
 104  out:
 105         btrfs_release_path(&path);
 106         return ret;
 107 }
 108
 109 /*
 110  * Check if a shared data reference points to a node that has a file extent item
 111  * pointing to the extent at @disk_bytenr that is not of type prealloc.
 112  *
 113  * Returns 1 if true, 0 if false and < 0 on error.
 114  */
 115 static int check_prealloc_shared_data_ref(struct btrfs_fs_info *fs_info,
 116                                           u64 parent, u64 disk_bytenr)
 117 {
 118         struct extent_buffer *eb;
 119         u32 nr;
 120         int i;
 121         int ret = 0;
 122
 123         eb = read_tree_block(fs_info, parent, 0);
 124         if (!extent_buffer_uptodate(eb)) {
 125                 ret = -EIO;
 126                 goto out;
 127         }
 128
 129         nr = btrfs_header_nritems(eb);
 130         for (i = 0; i < nr; i++) {
 131                 struct btrfs_key key;
 132                 struct btrfs_file_extent_item *fi;
 133                 int extent_type;
 134
 135                 btrfs_item_key_to_cpu(eb, &key, i);
 136                 if (key.type != BTRFS_EXTENT_DATA_KEY)
 137                         continue;
 138
 139                 fi = btrfs_item_ptr(eb, i, struct btrfs_file_extent_item);
 140                 extent_type = btrfs_file_extent_type(eb, fi);
 141                 if (extent_type != BTRFS_FILE_EXTENT_REG &&
 142                     extent_type != BTRFS_FILE_EXTENT_PREALLOC)
 143                         continue;
 144
 145                 if (btrfs_file_extent_disk_bytenr(eb, fi) == disk_bytenr &&
 146                     extent_type == BTRFS_FILE_EXTENT_REG) {
 147                         ret = 1;
 148                         break;
 149                 }
 150         }
 151  out:
 152         free_extent_buffer(eb);
 153         return ret;
 154 }
 155
 156 /*
 157  * Check if a prealloc extent is shared by multiple inodes and if any inode has
 158  * already written to that extent. This is to avoid emitting invalid warnings
 159  * about odd csum items (a inode has an extent entirely marked as prealloc
 160  * but another inode shares it and has already written to it).
 161  *
 162  * Note: right now it does not check if the number of checksum items in the
 163  * csum tree matches the number of bytes written into the ex-prealloc extent.
 164  * It's complex to deal with that because the prealloc extent might have been
 165  * partially written through multiple inodes and we would have to keep track of
 166  * ranges, merging them and notice ranges that fully or partially overlap, to
 167  * avoid false reports of csum items missing for areas of the prealloc extent
 168  * that were not written to - for example if we have a 1M prealloc extent, we
 169  * can have only the first half of it written, but 2 different inodes refer to
 170  * the its first half (through reflinks/cloning), so keeping a counter of bytes
 171  * covered by checksum items is not enough, as the correct value would be 512K
 172  * and not 1M (whence the need to track ranges).
 173  *
 174  * Returns 0 if the prealloc extent was not written yet by any inode, 1 if
 175  * at least one other inode has written to it, and < 0 on error.
 176  */
 177 int check_prealloc_extent_written(struct btrfs_fs_info *fs_info,
 178                                   u64 disk_bytenr, u64 num_bytes)
 179 {
 180         struct btrfs_path path;
 181         struct btrfs_key key;
 182         int ret;
 183         struct btrfs_extent_item *ei;
 184         u32 item_size;
 185         unsigned long ptr;
 186         unsigned long end;
 187
 188         key.objectid = disk_bytenr;
 189         key.type = BTRFS_EXTENT_ITEM_KEY;
 190         key.offset = num_bytes;
 191
 192         btrfs_init_path(&path);
 193         ret = btrfs_search_slot(NULL, fs_info->extent_root, &key, &path, 0, 0);
 194         if (ret > 0) {
 195                 fprintf(stderr,
 196         "Missing extent item in extent tree for disk_bytenr %llu, num_bytes %llu\n",
 197                         disk_bytenr, num_bytes);
 198                 ret = -ENOENT;
 199         }
 200         if (ret < 0)
 201                 goto out;
 202
 203         /* First check all inline refs. */
 204         ei = btrfs_item_ptr(path.nodes[0], path.slots[0],
 205                             struct btrfs_extent_item);
 206         item_size = btrfs_item_size_nr(path.nodes[0], path.slots[0]);
 207         ptr = (unsigned long)(ei + 1);
 208         end = (unsigned long)ei + item_size;
 209         while (ptr < end) {
 210                 struct btrfs_extent_inline_ref *iref;
 211                 int type;
 212
 213                 iref = (struct btrfs_extent_inline_ref *)ptr;
 214                 type = btrfs_extent_inline_ref_type(path.nodes[0], iref);
 215                 ASSERT(type == BTRFS_EXTENT_DATA_REF_KEY ||
 216                        type == BTRFS_SHARED_DATA_REF_KEY);
 217
 218                 if (type == BTRFS_EXTENT_DATA_REF_KEY) {
 219                         struct btrfs_extent_data_ref *dref;
 220
 221                         dref = (struct btrfs_extent_data_ref *)(&iref->offset);
 222                         ret = check_prealloc_data_ref(fs_info, disk_bytenr,
 223                                                       dref, path.nodes[0]);
 224                         if (ret != 0)
 225                                 goto out;
 226                 } else if (type == BTRFS_SHARED_DATA_REF_KEY) {
 227                         u64 parent;
 228
 229                         parent = btrfs_extent_inline_ref_offset(path.nodes[0],
 230                                                                 iref);
 231                         ret = check_prealloc_shared_data_ref(fs_info,
 232                                                              parent,
 233                                                              disk_bytenr);
 234                         if (ret != 0)
 235                                 goto out;
 236                 }
 237
 238                 ptr += btrfs_extent_inline_ref_size(type);
 239         }
 240
 241         /* Now check if there are any non-inlined refs. */
 242         path.slots[0]++;
 243         while (true) {
 244                 if (path.slots[0] >= btrfs_header_nritems(path.nodes[0])) {
 245                         ret = btrfs_next_leaf(fs_info->extent_root, &path);
 246                         if (ret < 0)
 247                                 goto out;
 248                         if (ret > 0) {
 249                                 ret = 0;
 250                                 break;
 251                         }
 252                 }
 253
 254                 btrfs_item_key_to_cpu(path.nodes[0], &key, path.slots[0]);
 255                 if (key.objectid != disk_bytenr)
 256                         break;
 257
 258                 if (key.type == BTRFS_EXTENT_DATA_REF_KEY) {
 259                         struct btrfs_extent_data_ref *dref;
 260
 261                         dref = btrfs_item_ptr(path.nodes[0], path.slots[0],
 262                                               struct btrfs_extent_data_ref);
 263                         ret = check_prealloc_data_ref(fs_info, disk_bytenr,
 264                                                       dref, path.nodes[0]);
 265                         if (ret != 0)
 266                                 goto out;
 267                 } else if (key.type == BTRFS_SHARED_DATA_REF_KEY) {
 268                         ret = check_prealloc_shared_data_ref(fs_info,
 269                                                              key.offset,
 270                                                              disk_bytenr);
 271                         if (ret != 0)
 272                                 goto out;
 273                 }
 274
 275                 path.slots[0]++;
 276         }
 277 out:
 278         btrfs_release_path(&path);
 279         return ret;
 280 }
 281
 282 /*
 283  * Search in csum tree to find how many bytes of range [@start, @start + @len)
 284  * has the corresponding csum item.
 285  *
 286  * @start:      range start
 287  * @len:        range length
 288  * @found:      return value of found csum bytes
 289  *              unit is BYTE.
 290  */
 291 int count_csum_range(struct btrfs_fs_info *fs_info, u64 start,
 292                      u64 len, u64 *found)
 293 {
 294         struct btrfs_key key;
 295         struct btrfs_path path;
 296         struct extent_buffer *leaf;
 297         int ret;
 298         size_t size;
 299         *found = 0;
 300         u64 csum_end;
 301         u16 csum_size = btrfs_super_csum_size(fs_info->super_copy);
 302
 303         btrfs_init_path(&path);
 304
 305         key.objectid = BTRFS_EXTENT_CSUM_OBJECTID;
 306         key.offset = start;
 307         key.type = BTRFS_EXTENT_CSUM_KEY;
 308
 309         ret = btrfs_search_slot(NULL, fs_info->csum_root,
 310                                 &key, &path, 0, 0);
 311         if (ret < 0)
 312                 goto out;
 313         if (ret > 0 && path.slots[0] > 0) {
 314                 leaf = path.nodes[0];
 315                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0] - 1);
 316                 if (key.objectid == BTRFS_EXTENT_CSUM_OBJECTID &&
 317                     key.type == BTRFS_EXTENT_CSUM_KEY)
 318                         path.slots[0]--;
 319         }
 320
 321         while (len > 0) {
 322                 leaf = path.nodes[0];
 323                 if (path.slots[0] >= btrfs_header_nritems(leaf)) {
 324                         ret = btrfs_next_leaf(fs_info->csum_root, &path);
 325                         if (ret > 0)
 326                                 break;
 327                         else if (ret < 0)
 328                                 goto out;
 329                         leaf = path.nodes[0];
 330                 }
 331
 332                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
 333                 if (key.objectid != BTRFS_EXTENT_CSUM_OBJECTID ||
 334                     key.type != BTRFS_EXTENT_CSUM_KEY)
 335                         break;
 336
 337                 btrfs_item_key_to_cpu(leaf, &key, path.slots[0]);
 338                 if (key.offset >= start + len)
 339                         break;
 340
 341                 if (key.offset > start)
 342                         start = key.offset;
 343
 344                 size = btrfs_item_size_nr(leaf, path.slots[0]);
 345                 csum_end = key.offset + (size / csum_size) *
 346                            fs_info->sectorsize;
 347                 if (csum_end > start) {
 348                         size = min(csum_end - start, len);
 349                         len -= size;
 350                         start += size;
 351                         *found += size;
 352                 }
 353
 354                 path.slots[0]++;
 355         }
 356 out:
 357         btrfs_release_path(&path);
 358         if (ret < 0)
 359                 return ret;
 360         return 0;
 361 }
 362
 363 /*
 364  * Wrapper to insert one inode item into given @root
 365  * Timestamp will be set to current time.
 366  *
 367  * @root:       the root to insert inode item into
 368  * @ino:        inode number
 369  * @size:       inode size
 370  * @nbytes:     nbytes (real used size, without hole)
 371  * @nlink:      number of links
 372  * @mode:       file mode, including S_IF* bits
 373  */
 374 int insert_inode_item(struct btrfs_trans_handle *trans,
 375                       struct btrfs_root *root, u64 ino, u64 size,
 376                       u64 nbytes, u64 nlink, u32 mode)
 377 {
 378         struct btrfs_inode_item ii;
 379         time_t now = time(NULL);
 380         int ret;
 381
 382         btrfs_set_stack_inode_size(&ii, size);
 383         btrfs_set_stack_inode_nbytes(&ii, nbytes);
 384         btrfs_set_stack_inode_nlink(&ii, nlink);
 385         btrfs_set_stack_inode_mode(&ii, mode);
 386         btrfs_set_stack_inode_generation(&ii, trans->transid);
 387         btrfs_set_stack_timespec_nsec(&ii.atime, 0);
 388         btrfs_set_stack_timespec_sec(&ii.ctime, now);
 389         btrfs_set_stack_timespec_nsec(&ii.ctime, 0);
 390         btrfs_set_stack_timespec_sec(&ii.mtime, now);
 391         btrfs_set_stack_timespec_nsec(&ii.mtime, 0);
 392         btrfs_set_stack_timespec_sec(&ii.otime, 0);
 393         btrfs_set_stack_timespec_nsec(&ii.otime, 0);
 394
 395         ret = btrfs_insert_inode(trans, root, ino, &ii);
 396         ASSERT(!ret);
 397
 398         warning("root %llu inode %llu recreating inode item, this may "
 399                 "be incomplete, please check permissions and content after "
 400                 "the fsck completes.\n", (unsigned long long)root->objectid,
 401                 (unsigned long long)ino);
 402
 403         return 0;
 404 }
 405
 406 static int get_highest_inode(struct btrfs_trans_handle *trans,
 407                              struct btrfs_root *root, struct btrfs_path *path,
 408                              u64 *highest_ino)
 409 {
 410         struct btrfs_key key, found_key;
 411         int ret;
 412
 413         btrfs_init_path(path);
 414         key.objectid = BTRFS_LAST_FREE_OBJECTID;
 415         key.offset = -1;
 416         key.type = BTRFS_INODE_ITEM_KEY;
 417         ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
 418         if (ret == 1) {
 419                 btrfs_item_key_to_cpu(path->nodes[0], &found_key,
 420                                 path->slots[0] - 1);
 421                 *highest_ino = found_key.objectid;
 422                 ret = 0;
 423         }
 424         if (*highest_ino >= BTRFS_LAST_FREE_OBJECTID)
 425                 ret = -EOVERFLOW;
 426         btrfs_release_path(path);
 427         return ret;
 428 }
 429
 430 /*
 431  * Link inode to dir 'lost+found'. Increase @ref_count.
 432  *
 433  * Returns 0 means success.
 434  * Returns <0 means failure.
 435  */
 436 int link_inode_to_lostfound(struct btrfs_trans_handle *trans,
 437                             struct btrfs_root *root,
 438                             struct btrfs_path *path,
 439                             u64 ino, char *namebuf, u32 name_len,
 440                             u8 filetype, u64 *ref_count)
 441 {
 442         char *dir_name = "lost+found";
 443         u64 lost_found_ino;
 444         int ret;
 445         u32 mode = 0700;
 446
 447         btrfs_release_path(path);
 448         ret = get_highest_inode(trans, root, path, &lost_found_ino);
 449         if (ret < 0)
 450                 goto out;
 451         lost_found_ino++;
 452
 453         ret = btrfs_mkdir(trans, root, dir_name, strlen(dir_name),
 454                           BTRFS_FIRST_FREE_OBJECTID, &lost_found_ino,
 455                           mode);
 456         if (ret < 0) {
 457                 error("failed to create '%s' dir: %s", dir_name, strerror(-ret));
 458                 goto out;
 459         }
 460         ret = btrfs_add_link(trans, root, ino, lost_found_ino,
 461                              namebuf, name_len, filetype, NULL, 1, 0);
 462         /*
 463          * Add ".INO" suffix several times to handle case where
 464          * "FILENAME.INO" is already taken by another file.
 465          */
 466         while (ret == -EEXIST) {
 467                 /*
 468                  * Conflicting file name, add ".INO" as suffix * +1 for '.'
 469                  */
 470                 if (name_len + count_digits(ino) + 1 > BTRFS_NAME_LEN) {
 471                         ret = -EFBIG;
 472                         goto out;
 473                 }
 474                 snprintf(namebuf + name_len, BTRFS_NAME_LEN - name_len,
 475                          ".%llu", ino);
 476                 name_len += count_digits(ino) + 1;
 477                 ret = btrfs_add_link(trans, root, ino, lost_found_ino, namebuf,
 478                                      name_len, filetype, NULL, 1, 0);
 479         }
 480         if (ret < 0) {
 481                 error("failed to link the inode %llu to %s dir: %s",
 482                       ino, dir_name, strerror(-ret));
 483                 goto out;
 484         }
 485
 486         ++*ref_count;
 487         printf("Moving file '%.*s' to '%s' dir since it has no valid backref\n",
 488                name_len, namebuf, dir_name);
 489 out:
 490         btrfs_release_path(path);
 491         if (ret)
 492                 error("failed to move file '%.*s' to '%s' dir", name_len,
 493                                 namebuf, dir_name);
 494         return ret;
 495 }
 496
 497 /*
 498  * Extra (optional) check for dev_item size to report possbile problem on a new
 499  * kernel.
 500  */
 501 void check_dev_size_alignment(u64 devid, u64 total_bytes, u32 sectorsize)
 502 {
 503         if (!IS_ALIGNED(total_bytes, sectorsize)) {
 504                 warning(
 505 "unaligned total_bytes detected for devid %llu, have %llu should be aligned to %u",
 506                         devid, total_bytes, sectorsize);
 507                 warning(
 508 "this is OK for older kernel, but may cause kernel warning for newer kernels");
 509                 warning("this can be fixed by 'btrfs rescue fix-device-size'");
 510         }
 511 }
 512
 513 void reada_walk_down(struct btrfs_root *root, struct extent_buffer *node,
 514                      int slot)
 515 {
 516         struct btrfs_fs_info *fs_info = root->fs_info;
 517         u64 bytenr;
 518         u64 ptr_gen;
 519         u32 nritems;
 520         int i;
 521         int level;
 522
 523         level = btrfs_header_level(node);
 524         if (level != 1)
 525                 return;
 526
 527         nritems = btrfs_header_nritems(node);
 528         for (i = slot; i < nritems; i++) {
 529                 bytenr = btrfs_node_blockptr(node, i);
 530                 ptr_gen = btrfs_node_ptr_generation(node, i);
 531                 readahead_tree_block(fs_info, bytenr, ptr_gen);
 532         }
 533 }
 534
 535 /*
 536  * Check the child node/leaf by the following condition:
 537  * 1. the first item key of the node/leaf should be the same with the one
 538  *    in parent.
 539  * 2. block in parent node should match the child node/leaf.
 540  * 3. generation of parent node and child's header should be consistent.
 541  *
 542  * Or the child node/leaf pointed by the key in parent is not valid.
 543  *
 544  * We hope to check leaf owner too, but since subvol may share leaves,
 545  * which makes leaf owner check not so strong, key check should be
 546  * sufficient enough for that case.
 547  */
 548 int check_child_node(struct extent_buffer *parent, int slot,
 549                      struct extent_buffer *child)
 550 {
 551         struct btrfs_key parent_key;
 552         struct btrfs_key child_key;
 553         int ret = 0;
 554
 555         btrfs_node_key_to_cpu(parent, &parent_key, slot);
 556         if (btrfs_header_level(child) == 0)
 557                 btrfs_item_key_to_cpu(child, &child_key, 0);
 558         else
 559                 btrfs_node_key_to_cpu(child, &child_key, 0);
 560
 561         if (memcmp(&parent_key, &child_key, sizeof(parent_key))) {
 562                 ret = -EINVAL;
 563                 fprintf(stderr,
 564                         "Wrong key of child node/leaf, wanted: (%llu, %u, %llu), have: (%llu, %u, %llu)\n",
 565                         parent_key.objectid, parent_key.type, parent_key.offset,
 566                         child_key.objectid, child_key.type, child_key.offset);
 567         }
 568         if (btrfs_header_bytenr(child) != btrfs_node_blockptr(parent, slot)) {
 569                 ret = -EINVAL;
 570                 fprintf(stderr, "Wrong block of child node/leaf, wanted: %llu, have: %llu\n",
 571                         btrfs_node_blockptr(parent, slot),
 572                         btrfs_header_bytenr(child));
 573         }
 574         if (btrfs_node_ptr_generation(parent, slot) !=
 575             btrfs_header_generation(child)) {
 576                 ret = -EINVAL;
 577                 fprintf(stderr, "Wrong generation of child node/leaf, wanted: %llu, have: %llu\n",
 578                         btrfs_header_generation(child),
 579                         btrfs_node_ptr_generation(parent, slot));
 580         }
 581         return ret;
 582 }
 583
 584 void reset_cached_block_groups(struct btrfs_fs_info *fs_info)
 585 {
 586         struct btrfs_block_group_cache *cache;
 587         u64 start, end;
 588         int ret;
 589
 590         while (1) {
 591                 ret = find_first_extent_bit(&fs_info->free_space_cache, 0,
 592                                             &start, &end, EXTENT_DIRTY);
 593                 if (ret)
 594                         break;
 595                 clear_extent_dirty(&fs_info->free_space_cache, start, end);
 596         }
 597
 598         start = 0;
 599         while (1) {
 600                 cache = btrfs_lookup_first_block_group(fs_info, start);
 601                 if (!cache)
 602                         break;
 603                 if (cache->cached)
 604                         cache->cached = 0;
 605                 start = cache->key.objectid + cache->key.offset;
 606         }
 607 }