fs/locks.c

   1 // SPDX-License-Identifier: GPL-2.0-only
   2 /*
   3  *  linux/fs/locks.c
   4  *
   5  * We implement four types of file locks: BSD locks, posix locks, open
   6  * file description locks, and leases.  For details about BSD locks,
   7  * see the flock(2) man page; for details about the other three, see
   8  * fcntl(2).
   9  *
  10  *
  11  * Locking conflicts and dependencies:
  12  * If multiple threads attempt to lock the same byte (or flock the same file)
  13  * only one can be granted the lock, and other must wait their turn.
  14  * The first lock has been "applied" or "granted", the others are "waiting"
  15  * and are "blocked" by the "applied" lock..
  16  *
  17  * Waiting and applied locks are all kept in trees whose properties are:
  18  *
  19  *      - the root of a tree may be an applied or waiting lock.
  20  *      - every other node in the tree is a waiting lock that
  21  *        conflicts with every ancestor of that node.
  22  *
  23  * Every such tree begins life as a waiting singleton which obviously
  24  * satisfies the above properties.
  25  *
  26  * The only ways we modify trees preserve these properties:
  27  *
  28  *      1. We may add a new leaf node, but only after first verifying that it
  29  *         conflicts with all of its ancestors.
  30  *      2. We may remove the root of a tree, creating a new singleton
  31  *         tree from the root and N new trees rooted in the immediate
  32  *         children.
  33  *      3. If the root of a tree is not currently an applied lock, we may
  34  *         apply it (if possible).
  35  *      4. We may upgrade the root of the tree (either extend its range,
  36  *         or upgrade its entire range from read to write).
  37  *
  38  * When an applied lock is modified in a way that reduces or downgrades any
  39  * part of its range, we remove all its children (2 above).  This particularly
  40  * happens when a lock is unlocked.
  41  *
  42  * For each of those child trees we "wake up" the thread which is
  43  * waiting for the lock so it can continue handling as follows: if the
  44  * root of the tree applies, we do so (3).  If it doesn't, it must
  45  * conflict with some applied lock.  We remove (wake up) all of its children
  46  * (2), and add it is a new leaf to the tree rooted in the applied
  47  * lock (1).  We then repeat the process recursively with those
  48  * children.
  49  *
  50  */
  51
  52 #include <linux/capability.h>
  53 #include <linux/file.h>
  54 #include <linux/fdtable.h>
  55 #include <linux/fs.h>
  56 #include <linux/init.h>
  57 #include <linux/security.h>
  58 #include <linux/slab.h>
  59 #include <linux/syscalls.h>
  60 #include <linux/time.h>
  61 #include <linux/rcupdate.h>
  62 #include <linux/pid_namespace.h>
  63 #include <linux/hashtable.h>
  64 #include <linux/percpu.h>
  65
  66 #define CREATE_TRACE_POINTS
  67 #include <trace/events/filelock.h>
  68
  69 #include <linux/uaccess.h>
  70
  71 #define IS_POSIX(fl)    (fl->fl_flags & FL_POSIX)
  72 #define IS_FLOCK(fl)    (fl->fl_flags & FL_FLOCK)
  73 #define IS_LEASE(fl)    (fl->fl_flags & (FL_LEASE|FL_DELEG|FL_LAYOUT))
  74 #define IS_OFDLCK(fl)   (fl->fl_flags & FL_OFDLCK)
  75 #define IS_REMOTELCK(fl)        (fl->fl_pid <= 0)
  76
  77 static bool lease_breaking(struct file_lock *fl)
  78 {
  79         return fl->fl_flags & (FL_UNLOCK_PENDING | FL_DOWNGRADE_PENDING);
  80 }
  81
  82 static int target_leasetype(struct file_lock *fl)
  83 {
  84         if (fl->fl_flags & FL_UNLOCK_PENDING)
  85                 return F_UNLCK;
  86         if (fl->fl_flags & FL_DOWNGRADE_PENDING)
  87                 return F_RDLCK;
  88         return fl->fl_type;
  89 }
  90
  91 int leases_enable = 1;
  92 int lease_break_time = 45;
  93
  94 /*
  95  * The global file_lock_list is only used for displaying /proc/locks, so we
  96  * keep a list on each CPU, with each list protected by its own spinlock.
  97  * Global serialization is done using file_rwsem.
  98  *
  99  * Note that alterations to the list also require that the relevant flc_lock is
 100  * held.
 101  */
 102 struct file_lock_list_struct {
 103         spinlock_t              lock;
 104         struct hlist_head       hlist;
 105 };
 106 static DEFINE_PER_CPU(struct file_lock_list_struct, file_lock_list);
 107 DEFINE_STATIC_PERCPU_RWSEM(file_rwsem);
 108
 109
 110 /*
 111  * The blocked_hash is used to find POSIX lock loops for deadlock detection.
 112  * It is protected by blocked_lock_lock.
 113  *
 114  * We hash locks by lockowner in order to optimize searching for the lock a
 115  * particular lockowner is waiting on.
 116  *
 117  * FIXME: make this value scale via some heuristic? We generally will want more
 118  * buckets when we have more lockowners holding locks, but that's a little
 119  * difficult to determine without knowing what the workload will look like.
 120  */
 121 #define BLOCKED_HASH_BITS       7
 122 static DEFINE_HASHTABLE(blocked_hash, BLOCKED_HASH_BITS);
 123
 124 /*
 125  * This lock protects the blocked_hash. Generally, if you're accessing it, you
 126  * want to be holding this lock.
 127  *
 128  * In addition, it also protects the fl->fl_blocked_requests list, and the
 129  * fl->fl_blocker pointer for file_lock structures that are acting as lock
 130  * requests (in contrast to those that are acting as records of acquired locks).
 131  *
 132  * Note that when we acquire this lock in order to change the above fields,
 133  * we often hold the flc_lock as well. In certain cases, when reading the fields
 134  * protected by this lock, we can skip acquiring it iff we already hold the
 135  * flc_lock.
 136  */
 137 static DEFINE_SPINLOCK(blocked_lock_lock);
 138
 139 static struct kmem_cache *flctx_cache __read_mostly;
 140 static struct kmem_cache *filelock_cache __read_mostly;
 141
 142 static struct file_lock_context *
 143 locks_get_lock_context(struct inode *inode, int type)
 144 {
 145         struct file_lock_context *ctx;
 146
 147         /* paired with cmpxchg() below */
 148         ctx = smp_load_acquire(&inode->i_flctx);
 149         if (likely(ctx) || type == F_UNLCK)
 150                 goto out;
 151
 152         ctx = kmem_cache_alloc(flctx_cache, GFP_KERNEL);
 153         if (!ctx)
 154                 goto out;
 155
 156         spin_lock_init(&ctx->flc_lock);
 157         INIT_LIST_HEAD(&ctx->flc_flock);
 158         INIT_LIST_HEAD(&ctx->flc_posix);
 159         INIT_LIST_HEAD(&ctx->flc_lease);
 160
 161         /*
 162          * Assign the pointer if it's not already assigned. If it is, then
 163          * free the context we just allocated.
 164          */
 165         if (cmpxchg(&inode->i_flctx, NULL, ctx)) {
 166                 kmem_cache_free(flctx_cache, ctx);
 167                 ctx = smp_load_acquire(&inode->i_flctx);
 168         }
 169 out:
 170         trace_locks_get_lock_context(inode, type, ctx);
 171         return ctx;
 172 }
 173
 174 static void
 175 locks_dump_ctx_list(struct list_head *list, char *list_type)
 176 {
 177         struct file_lock *fl;
 178
 179         list_for_each_entry(fl, list, fl_list) {
 180                 pr_warn("%s: fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n", list_type, fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
 181         }
 182 }
 183
 184 static void
 185 locks_check_ctx_lists(struct inode *inode)
 186 {
 187         struct file_lock_context *ctx = inode->i_flctx;
 188
 189         if (unlikely(!list_empty(&ctx->flc_flock) ||
 190                      !list_empty(&ctx->flc_posix) ||
 191                      !list_empty(&ctx->flc_lease))) {
 192                 pr_warn("Leaked locks on dev=0x%x:0x%x ino=0x%lx:\n",
 193                         MAJOR(inode->i_sb->s_dev), MINOR(inode->i_sb->s_dev),
 194                         inode->i_ino);
 195                 locks_dump_ctx_list(&ctx->flc_flock, "FLOCK");
 196                 locks_dump_ctx_list(&ctx->flc_posix, "POSIX");
 197                 locks_dump_ctx_list(&ctx->flc_lease, "LEASE");
 198         }
 199 }
 200
 201 static void
 202 locks_check_ctx_file_list(struct file *filp, struct list_head *list,
 203                                 char *list_type)
 204 {
 205         struct file_lock *fl;
 206         struct inode *inode = locks_inode(filp);
 207
 208         list_for_each_entry(fl, list, fl_list)
 209                 if (fl->fl_file == filp)
 210                         pr_warn("Leaked %s lock on dev=0x%x:0x%x ino=0x%lx "
 211                                 " fl_owner=%p fl_flags=0x%x fl_type=0x%x fl_pid=%u\n",
 212                                 list_type, MAJOR(inode->i_sb->s_dev),
 213                                 MINOR(inode->i_sb->s_dev), inode->i_ino,
 214                                 fl->fl_owner, fl->fl_flags, fl->fl_type, fl->fl_pid);
 215 }
 216
 217 void
 218 locks_free_lock_context(struct inode *inode)
 219 {
 220         struct file_lock_context *ctx = inode->i_flctx;
 221
 222         if (unlikely(ctx)) {
 223                 locks_check_ctx_lists(inode);
 224                 kmem_cache_free(flctx_cache, ctx);
 225         }
 226 }
 227
 228 static void locks_init_lock_heads(struct file_lock *fl)
 229 {
 230         INIT_HLIST_NODE(&fl->fl_link);
 231         INIT_LIST_HEAD(&fl->fl_list);
 232         INIT_LIST_HEAD(&fl->fl_blocked_requests);
 233         INIT_LIST_HEAD(&fl->fl_blocked_member);
 234         init_waitqueue_head(&fl->fl_wait);
 235 }
 236
 237 /* Allocate an empty lock structure. */
 238 struct file_lock *locks_alloc_lock(void)
 239 {
 240         struct file_lock *fl = kmem_cache_zalloc(filelock_cache, GFP_KERNEL);
 241
 242         if (fl)
 243                 locks_init_lock_heads(fl);
 244
 245         return fl;
 246 }
 247 EXPORT_SYMBOL_GPL(locks_alloc_lock);
 248
 249 void locks_release_private(struct file_lock *fl)
 250 {
 251         BUG_ON(waitqueue_active(&fl->fl_wait));
 252         BUG_ON(!list_empty(&fl->fl_list));
 253         BUG_ON(!list_empty(&fl->fl_blocked_requests));
 254         BUG_ON(!list_empty(&fl->fl_blocked_member));
 255         BUG_ON(!hlist_unhashed(&fl->fl_link));
 256
 257         if (fl->fl_ops) {
 258                 if (fl->fl_ops->fl_release_private)
 259                         fl->fl_ops->fl_release_private(fl);
 260                 fl->fl_ops = NULL;
 261         }
 262
 263         if (fl->fl_lmops) {
 264                 if (fl->fl_lmops->lm_put_owner) {
 265                         fl->fl_lmops->lm_put_owner(fl->fl_owner);
 266                         fl->fl_owner = NULL;
 267                 }
 268                 fl->fl_lmops = NULL;
 269         }
 270 }
 271 EXPORT_SYMBOL_GPL(locks_release_private);
 272
 273 /* Free a lock which is not in use. */
 274 void locks_free_lock(struct file_lock *fl)
 275 {
 276         locks_release_private(fl);
 277         kmem_cache_free(filelock_cache, fl);
 278 }
 279 EXPORT_SYMBOL(locks_free_lock);
 280
 281 static void
 282 locks_dispose_list(struct list_head *dispose)
 283 {
 284         struct file_lock *fl;
 285
 286         while (!list_empty(dispose)) {
 287                 fl = list_first_entry(dispose, struct file_lock, fl_list);
 288                 list_del_init(&fl->fl_list);
 289                 locks_free_lock(fl);
 290         }
 291 }
 292
 293 void locks_init_lock(struct file_lock *fl)
 294 {
 295         memset(fl, 0, sizeof(struct file_lock));
 296         locks_init_lock_heads(fl);
 297 }
 298 EXPORT_SYMBOL(locks_init_lock);
 299
 300 /*
 301  * Initialize a new lock from an existing file_lock structure.
 302  */
 303 void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
 304 {
 305         new->fl_owner = fl->fl_owner;
 306         new->fl_pid = fl->fl_pid;
 307         new->fl_file = NULL;
 308         new->fl_flags = fl->fl_flags;
 309         new->fl_type = fl->fl_type;
 310         new->fl_start = fl->fl_start;
 311         new->fl_end = fl->fl_end;
 312         new->fl_lmops = fl->fl_lmops;
 313         new->fl_ops = NULL;
 314
 315         if (fl->fl_lmops) {
 316                 if (fl->fl_lmops->lm_get_owner)
 317                         fl->fl_lmops->lm_get_owner(fl->fl_owner);
 318         }
 319 }
 320 EXPORT_SYMBOL(locks_copy_conflock);
 321
 322 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 323 {
 324         /* "new" must be a freshly-initialized lock */
 325         WARN_ON_ONCE(new->fl_ops);
 326
 327         locks_copy_conflock(new, fl);
 328
 329         new->fl_file = fl->fl_file;
 330         new->fl_ops = fl->fl_ops;
 331
 332         if (fl->fl_ops) {
 333                 if (fl->fl_ops->fl_copy_lock)
 334                         fl->fl_ops->fl_copy_lock(new, fl);
 335         }
 336 }
 337 EXPORT_SYMBOL(locks_copy_lock);
 338
 339 static void locks_move_blocks(struct file_lock *new, struct file_lock *fl)
 340 {
 341         struct file_lock *f;
 342
 343         /*
 344          * As ctx->flc_lock is held, new requests cannot be added to
 345          * ->fl_blocked_requests, so we don't need a lock to check if it
 346          * is empty.
 347          */
 348         if (list_empty(&fl->fl_blocked_requests))
 349                 return;
 350         spin_lock(&blocked_lock_lock);
 351         list_splice_init(&fl->fl_blocked_requests, &new->fl_blocked_requests);
 352         list_for_each_entry(f, &new->fl_blocked_requests, fl_blocked_member)
 353                 f->fl_blocker = new;
 354         spin_unlock(&blocked_lock_lock);
 355 }
 356
 357 static inline int flock_translate_cmd(int cmd) {
 358         switch (cmd) {
 359         case LOCK_SH:
 360                 return F_RDLCK;
 361         case LOCK_EX:
 362                 return F_WRLCK;
 363         case LOCK_UN:
 364                 return F_UNLCK;
 365         }
 366         return -EINVAL;
 367 }
 368
 369 /* Fill in a file_lock structure with an appropriate FLOCK lock. */
 370 static struct file_lock *
 371 flock_make_lock(struct file *filp, unsigned int cmd, struct file_lock *fl)
 372 {
 373         int type = flock_translate_cmd(cmd);
 374
 375         if (type < 0)
 376                 return ERR_PTR(type);
 377
 378         if (fl == NULL) {
 379                 fl = locks_alloc_lock();
 380                 if (fl == NULL)
 381                         return ERR_PTR(-ENOMEM);
 382         } else {
 383                 locks_init_lock(fl);
 384         }
 385
 386         fl->fl_file = filp;
 387         fl->fl_owner = filp;
 388         fl->fl_pid = current->tgid;
 389         fl->fl_flags = FL_FLOCK;
 390         fl->fl_type = type;
 391         fl->fl_end = OFFSET_MAX;
 392
 393         return fl;
 394 }
 395
 396 static int assign_type(struct file_lock *fl, long type)
 397 {
 398         switch (type) {
 399         case F_RDLCK:
 400         case F_WRLCK:
 401         case F_UNLCK:
 402                 fl->fl_type = type;
 403                 break;
 404         default:
 405                 return -EINVAL;
 406         }
 407         return 0;
 408 }
 409
 410 static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl,
 411                                  struct flock64 *l)
 412 {
 413         switch (l->l_whence) {
 414         case SEEK_SET:
 415                 fl->fl_start = 0;
 416                 break;
 417         case SEEK_CUR:
 418                 fl->fl_start = filp->f_pos;
 419                 break;
 420         case SEEK_END:
 421                 fl->fl_start = i_size_read(file_inode(filp));
 422                 break;
 423         default:
 424                 return -EINVAL;
 425         }
 426         if (l->l_start > OFFSET_MAX - fl->fl_start)
 427                 return -EOVERFLOW;
 428         fl->fl_start += l->l_start;
 429         if (fl->fl_start < 0)
 430                 return -EINVAL;
 431
 432         /* POSIX-1996 leaves the case l->l_len < 0 undefined;
 433            POSIX-2001 defines it. */
 434         if (l->l_len > 0) {
 435                 if (l->l_len - 1 > OFFSET_MAX - fl->fl_start)
 436                         return -EOVERFLOW;
 437                 fl->fl_end = fl->fl_start + (l->l_len - 1);
 438
 439         } else if (l->l_len < 0) {
 440                 if (fl->fl_start + l->l_len < 0)
 441                         return -EINVAL;
 442                 fl->fl_end = fl->fl_start - 1;
 443                 fl->fl_start += l->l_len;
 444         } else
 445                 fl->fl_end = OFFSET_MAX;
 446
 447         fl->fl_owner = current->files;
 448         fl->fl_pid = current->tgid;
 449         fl->fl_file = filp;
 450         fl->fl_flags = FL_POSIX;
 451         fl->fl_ops = NULL;
 452         fl->fl_lmops = NULL;
 453
 454         return assign_type(fl, l->l_type);
 455 }
 456
 457 /* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX
 458  * style lock.
 459  */
 460 static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 461                                struct flock *l)
 462 {
 463         struct flock64 ll = {
 464                 .l_type = l->l_type,
 465                 .l_whence = l->l_whence,
 466                 .l_start = l->l_start,
 467                 .l_len = l->l_len,
 468         };
 469
 470         return flock64_to_posix_lock(filp, fl, &ll);
 471 }
 472
 473 /* default lease lock manager operations */
 474 static bool
 475 lease_break_callback(struct file_lock *fl)
 476 {
 477         kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
 478         return false;
 479 }
 480
 481 static void
 482 lease_setup(struct file_lock *fl, void **priv)
 483 {
 484         struct file *filp = fl->fl_file;
 485         struct fasync_struct *fa = *priv;
 486
 487         /*
 488          * fasync_insert_entry() returns the old entry if any. If there was no
 489          * old entry, then it used "priv" and inserted it into the fasync list.
 490          * Clear the pointer to indicate that it shouldn't be freed.
 491          */
 492         if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
 493                 *priv = NULL;
 494
 495         __f_setown(filp, task_pid(current), PIDTYPE_TGID, 0);
 496 }
 497
 498 static const struct lock_manager_operations lease_manager_ops = {
 499         .lm_break = lease_break_callback,
 500         .lm_change = lease_modify,
 501         .lm_setup = lease_setup,
 502 };
 503
 504 /*
 505  * Initialize a lease, use the default lock manager operations
 506  */
 507 static int lease_init(struct file *filp, long type, struct file_lock *fl)
 508 {
 509         if (assign_type(fl, type) != 0)
 510                 return -EINVAL;
 511
 512         fl->fl_owner = filp;
 513         fl->fl_pid = current->tgid;
 514
 515         fl->fl_file = filp;
 516         fl->fl_flags = FL_LEASE;
 517         fl->fl_start = 0;
 518         fl->fl_end = OFFSET_MAX;
 519         fl->fl_ops = NULL;
 520         fl->fl_lmops = &lease_manager_ops;
 521         return 0;
 522 }
 523
 524 /* Allocate a file_lock initialised to this type of lease */
 525 static struct file_lock *lease_alloc(struct file *filp, long type)
 526 {
 527         struct file_lock *fl = locks_alloc_lock();
 528         int error = -ENOMEM;
 529
 530         if (fl == NULL)
 531                 return ERR_PTR(error);
 532
 533         error = lease_init(filp, type, fl);
 534         if (error) {
 535                 locks_free_lock(fl);
 536                 return ERR_PTR(error);
 537         }
 538         return fl;
 539 }
 540
 541 /* Check if two locks overlap each other.
 542  */
 543 static inline int locks_overlap(struct file_lock *fl1, struct file_lock *fl2)
 544 {
 545         return ((fl1->fl_end >= fl2->fl_start) &&
 546                 (fl2->fl_end >= fl1->fl_start));
 547 }
 548
 549 /*
 550  * Check whether two locks have the same owner.
 551  */
 552 static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2)
 553 {
 554         return fl1->fl_owner == fl2->fl_owner;
 555 }
 556
 557 /* Must be called with the flc_lock held! */
 558 static void locks_insert_global_locks(struct file_lock *fl)
 559 {
 560         struct file_lock_list_struct *fll = this_cpu_ptr(&file_lock_list);
 561
 562         percpu_rwsem_assert_held(&file_rwsem);
 563
 564         spin_lock(&fll->lock);
 565         fl->fl_link_cpu = smp_processor_id();
 566         hlist_add_head(&fl->fl_link, &fll->hlist);
 567         spin_unlock(&fll->lock);
 568 }
 569
 570 /* Must be called with the flc_lock held! */
 571 static void locks_delete_global_locks(struct file_lock *fl)
 572 {
 573         struct file_lock_list_struct *fll;
 574
 575         percpu_rwsem_assert_held(&file_rwsem);
 576
 577         /*
 578          * Avoid taking lock if already unhashed. This is safe since this check
 579          * is done while holding the flc_lock, and new insertions into the list
 580          * also require that it be held.
 581          */
 582         if (hlist_unhashed(&fl->fl_link))
 583                 return;
 584
 585         fll = per_cpu_ptr(&file_lock_list, fl->fl_link_cpu);
 586         spin_lock(&fll->lock);
 587         hlist_del_init(&fl->fl_link);
 588         spin_unlock(&fll->lock);
 589 }
 590
 591 static unsigned long
 592 posix_owner_key(struct file_lock *fl)
 593 {
 594         return (unsigned long)fl->fl_owner;
 595 }
 596
 597 static void locks_insert_global_blocked(struct file_lock *waiter)
 598 {
 599         lockdep_assert_held(&blocked_lock_lock);
 600
 601         hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter));
 602 }
 603
 604 static void locks_delete_global_blocked(struct file_lock *waiter)
 605 {
 606         lockdep_assert_held(&blocked_lock_lock);
 607
 608         hash_del(&waiter->fl_link);
 609 }
 610
 611 /* Remove waiter from blocker's block list.
 612  * When blocker ends up pointing to itself then the list is empty.
 613  *
 614  * Must be called with blocked_lock_lock held.
 615  */
 616 static void __locks_delete_block(struct file_lock *waiter)
 617 {
 618         locks_delete_global_blocked(waiter);
 619         list_del_init(&waiter->fl_blocked_member);
 620 }
 621
 622 static void __locks_wake_up_blocks(struct file_lock *blocker)
 623 {
 624         while (!list_empty(&blocker->fl_blocked_requests)) {
 625                 struct file_lock *waiter;
 626
 627                 waiter = list_first_entry(&blocker->fl_blocked_requests,
 628                                           struct file_lock, fl_blocked_member);
 629                 __locks_delete_block(waiter);
 630                 if (waiter->fl_lmops && waiter->fl_lmops->lm_notify)
 631                         waiter->fl_lmops->lm_notify(waiter);
 632                 else
 633                         wake_up(&waiter->fl_wait);
 634
 635                 /*
 636                  * The setting of fl_blocker to NULL marks the "done"
 637                  * point in deleting a block. Paired with acquire at the top
 638                  * of locks_delete_block().
 639                  */
 640                 smp_store_release(&waiter->fl_blocker, NULL);
 641         }
 642 }
 643
 644 /**
 645  *      locks_delete_block - stop waiting for a file lock
 646  *      @waiter: the lock which was waiting
 647  *
 648  *      lockd/nfsd need to disconnect the lock while working on it.
 649  */
 650 int locks_delete_block(struct file_lock *waiter)
 651 {
 652         int status = -ENOENT;
 653
 654         /*
 655          * If fl_blocker is NULL, it won't be set again as this thread "owns"
 656          * the lock and is the only one that might try to claim the lock.
 657          *
 658          * We use acquire/release to manage fl_blocker so that we can
 659          * optimize away taking the blocked_lock_lock in many cases.
 660          *
 661          * The smp_load_acquire guarantees two things:
 662          *
 663          * 1/ that fl_blocked_requests can be tested locklessly. If something
 664          * was recently added to that list it must have been in a locked region
 665          * *before* the locked region when fl_blocker was set to NULL.
 666          *
 667          * 2/ that no other thread is accessing 'waiter', so it is safe to free
 668          * it.  __locks_wake_up_blocks is careful not to touch waiter after
 669          * fl_blocker is released.
 670          *
 671          * If a lockless check of fl_blocker shows it to be NULL, we know that
 672          * no new locks can be inserted into its fl_blocked_requests list, and
 673          * can avoid doing anything further if the list is empty.
 674          */
 675         if (!smp_load_acquire(&waiter->fl_blocker) &&
 676             list_empty(&waiter->fl_blocked_requests))
 677                 return status;
 678
 679         spin_lock(&blocked_lock_lock);
 680         if (waiter->fl_blocker)
 681                 status = 0;
 682         __locks_wake_up_blocks(waiter);
 683         __locks_delete_block(waiter);
 684
 685         /*
 686          * The setting of fl_blocker to NULL marks the "done" point in deleting
 687          * a block. Paired with acquire at the top of this function.
 688          */
 689         smp_store_release(&waiter->fl_blocker, NULL);
 690         spin_unlock(&blocked_lock_lock);
 691         return status;
 692 }
 693 EXPORT_SYMBOL(locks_delete_block);
 694
 695 /* Insert waiter into blocker's block list.
 696  * We use a circular list so that processes can be easily woken up in
 697  * the order they blocked. The documentation doesn't require this but
 698  * it seems like the reasonable thing to do.
 699  *
 700  * Must be called with both the flc_lock and blocked_lock_lock held. The
 701  * fl_blocked_requests list itself is protected by the blocked_lock_lock,
 702  * but by ensuring that the flc_lock is also held on insertions we can avoid
 703  * taking the blocked_lock_lock in some cases when we see that the
 704  * fl_blocked_requests list is empty.
 705  *
 706  * Rather than just adding to the list, we check for conflicts with any existing
 707  * waiters, and add beneath any waiter that blocks the new waiter.
 708  * Thus wakeups don't happen until needed.
 709  */
 710 static void __locks_insert_block(struct file_lock *blocker,
 711                                  struct file_lock *waiter,
 712                                  bool conflict(struct file_lock *,
 713                                                struct file_lock *))
 714 {
 715         struct file_lock *fl;
 716         BUG_ON(!list_empty(&waiter->fl_blocked_member));
 717
 718 new_blocker:
 719         list_for_each_entry(fl, &blocker->fl_blocked_requests, fl_blocked_member)
 720                 if (conflict(fl, waiter)) {
 721                         blocker =  fl;
 722                         goto new_blocker;
 723                 }
 724         waiter->fl_blocker = blocker;
 725         list_add_tail(&waiter->fl_blocked_member, &blocker->fl_blocked_requests);
 726         if (IS_POSIX(blocker) && !IS_OFDLCK(blocker))
 727                 locks_insert_global_blocked(waiter);
 728
 729         /* The requests in waiter->fl_blocked are known to conflict with
 730          * waiter, but might not conflict with blocker, or the requests
 731          * and lock which block it.  So they all need to be woken.
 732          */
 733         __locks_wake_up_blocks(waiter);
 734 }
 735
 736 /* Must be called with flc_lock held. */
 737 static void locks_insert_block(struct file_lock *blocker,
 738                                struct file_lock *waiter,
 739                                bool conflict(struct file_lock *,
 740                                              struct file_lock *))
 741 {
 742         spin_lock(&blocked_lock_lock);
 743         __locks_insert_block(blocker, waiter, conflict);
 744         spin_unlock(&blocked_lock_lock);
 745 }
 746
 747 /*
 748  * Wake up processes blocked waiting for blocker.
 749  *
 750  * Must be called with the inode->flc_lock held!
 751  */
 752 static void locks_wake_up_blocks(struct file_lock *blocker)
 753 {
 754         /*
 755          * Avoid taking global lock if list is empty. This is safe since new
 756          * blocked requests are only added to the list under the flc_lock, and
 757          * the flc_lock is always held here. Note that removal from the
 758          * fl_blocked_requests list does not require the flc_lock, so we must
 759          * recheck list_empty() after acquiring the blocked_lock_lock.
 760          */
 761         if (list_empty(&blocker->fl_blocked_requests))
 762                 return;
 763
 764         spin_lock(&blocked_lock_lock);
 765         __locks_wake_up_blocks(blocker);
 766         spin_unlock(&blocked_lock_lock);
 767 }
 768
 769 static void
 770 locks_insert_lock_ctx(struct file_lock *fl, struct list_head *before)
 771 {
 772         list_add_tail(&fl->fl_list, before);
 773         locks_insert_global_locks(fl);
 774 }
 775
 776 static void
 777 locks_unlink_lock_ctx(struct file_lock *fl)
 778 {
 779         locks_delete_global_locks(fl);
 780         list_del_init(&fl->fl_list);
 781         locks_wake_up_blocks(fl);
 782 }
 783
 784 static void
 785 locks_delete_lock_ctx(struct file_lock *fl, struct list_head *dispose)
 786 {
 787         locks_unlink_lock_ctx(fl);
 788         if (dispose)
 789                 list_add(&fl->fl_list, dispose);
 790         else
 791                 locks_free_lock(fl);
 792 }
 793
 794 /* Determine if lock sys_fl blocks lock caller_fl. Common functionality
 795  * checks for shared/exclusive status of overlapping locks.
 796  */
 797 static bool locks_conflict(struct file_lock *caller_fl,
 798                            struct file_lock *sys_fl)
 799 {
 800         if (sys_fl->fl_type == F_WRLCK)
 801                 return true;
 802         if (caller_fl->fl_type == F_WRLCK)
 803                 return true;
 804         return false;
 805 }
 806
 807 /* Determine if lock sys_fl blocks lock caller_fl. POSIX specific
 808  * checking before calling the locks_conflict().
 809  */
 810 static bool posix_locks_conflict(struct file_lock *caller_fl,
 811                                  struct file_lock *sys_fl)
 812 {
 813         /* POSIX locks owned by the same process do not conflict with
 814          * each other.
 815          */
 816         if (posix_same_owner(caller_fl, sys_fl))
 817                 return false;
 818
 819         /* Check whether they overlap */
 820         if (!locks_overlap(caller_fl, sys_fl))
 821                 return false;
 822
 823         return locks_conflict(caller_fl, sys_fl);
 824 }
 825
 826 /* Determine if lock sys_fl blocks lock caller_fl. FLOCK specific
 827  * checking before calling the locks_conflict().
 828  */
 829 static bool flock_locks_conflict(struct file_lock *caller_fl,
 830                                  struct file_lock *sys_fl)
 831 {
 832         /* FLOCK locks referring to the same filp do not conflict with
 833          * each other.
 834          */
 835         if (caller_fl->fl_file == sys_fl->fl_file)
 836                 return false;
 837
 838         return locks_conflict(caller_fl, sys_fl);
 839 }
 840
 841 void
 842 posix_test_lock(struct file *filp, struct file_lock *fl)
 843 {
 844         struct file_lock *cfl;
 845         struct file_lock_context *ctx;
 846         struct inode *inode = locks_inode(filp);
 847
 848         ctx = smp_load_acquire(&inode->i_flctx);
 849         if (!ctx || list_empty_careful(&ctx->flc_posix)) {
 850                 fl->fl_type = F_UNLCK;
 851                 return;
 852         }
 853
 854         spin_lock(&ctx->flc_lock);
 855         list_for_each_entry(cfl, &ctx->flc_posix, fl_list) {
 856                 if (posix_locks_conflict(fl, cfl)) {
 857                         locks_copy_conflock(fl, cfl);
 858                         goto out;
 859                 }
 860         }
 861         fl->fl_type = F_UNLCK;
 862 out:
 863         spin_unlock(&ctx->flc_lock);
 864         return;
 865 }
 866 EXPORT_SYMBOL(posix_test_lock);
 867
 868 /*
 869  * Deadlock detection:
 870  *
 871  * We attempt to detect deadlocks that are due purely to posix file
 872  * locks.
 873  *
 874  * We assume that a task can be waiting for at most one lock at a time.
 875  * So for any acquired lock, the process holding that lock may be
 876  * waiting on at most one other lock.  That lock in turns may be held by
 877  * someone waiting for at most one other lock.  Given a requested lock
 878  * caller_fl which is about to wait for a conflicting lock block_fl, we
 879  * follow this chain of waiters to ensure we are not about to create a
 880  * cycle.
 881  *
 882  * Since we do this before we ever put a process to sleep on a lock, we
 883  * are ensured that there is never a cycle; that is what guarantees that
 884  * the while() loop in posix_locks_deadlock() eventually completes.
 885  *
 886  * Note: the above assumption may not be true when handling lock
 887  * requests from a broken NFS client. It may also fail in the presence
 888  * of tasks (such as posix threads) sharing the same open file table.
 889  * To handle those cases, we just bail out after a few iterations.
 890  *
 891  * For FL_OFDLCK locks, the owner is the filp, not the files_struct.
 892  * Because the owner is not even nominally tied to a thread of
 893  * execution, the deadlock detection below can't reasonably work well. Just
 894  * skip it for those.
 895  *
 896  * In principle, we could do a more limited deadlock detection on FL_OFDLCK
 897  * locks that just checks for the case where two tasks are attempting to
 898  * upgrade from read to write locks on the same inode.
 899  */
 900
 901 #define MAX_DEADLK_ITERATIONS 10
 902
 903 /* Find a lock that the owner of the given block_fl is blocking on. */
 904 static struct file_lock *what_owner_is_waiting_for(struct file_lock *block_fl)
 905 {
 906         struct file_lock *fl;
 907
 908         hash_for_each_possible(blocked_hash, fl, fl_link, posix_owner_key(block_fl)) {
 909                 if (posix_same_owner(fl, block_fl)) {
 910                         while (fl->fl_blocker)
 911                                 fl = fl->fl_blocker;
 912                         return fl;
 913                 }
 914         }
 915         return NULL;
 916 }
 917
 918 /* Must be called with the blocked_lock_lock held! */
 919 static int posix_locks_deadlock(struct file_lock *caller_fl,
 920                                 struct file_lock *block_fl)
 921 {
 922         int i = 0;
 923
 924         lockdep_assert_held(&blocked_lock_lock);
 925
 926         /*
 927          * This deadlock detector can't reasonably detect deadlocks with
 928          * FL_OFDLCK locks, since they aren't owned by a process, per-se.
 929          */
 930         if (IS_OFDLCK(caller_fl))
 931                 return 0;
 932
 933         while ((block_fl = what_owner_is_waiting_for(block_fl))) {
 934                 if (i++ > MAX_DEADLK_ITERATIONS)
 935                         return 0;
 936                 if (posix_same_owner(caller_fl, block_fl))
 937                         return 1;
 938         }
 939         return 0;
 940 }
 941
 942 /* Try to create a FLOCK lock on filp. We always insert new FLOCK locks
 943  * after any leases, but before any posix locks.
 944  *
 945  * Note that if called with an FL_EXISTS argument, the caller may determine
 946  * whether or not a lock was successfully freed by testing the return
 947  * value for -ENOENT.
 948  */
 949 static int flock_lock_inode(struct inode *inode, struct file_lock *request)
 950 {
 951         struct file_lock *new_fl = NULL;
 952         struct file_lock *fl;
 953         struct file_lock_context *ctx;
 954         int error = 0;
 955         bool found = false;
 956         LIST_HEAD(dispose);
 957
 958         ctx = locks_get_lock_context(inode, request->fl_type);
 959         if (!ctx) {
 960                 if (request->fl_type != F_UNLCK)
 961                         return -ENOMEM;
 962                 return (request->fl_flags & FL_EXISTS) ? -ENOENT : 0;
 963         }
 964
 965         if (!(request->fl_flags & FL_ACCESS) && (request->fl_type != F_UNLCK)) {
 966                 new_fl = locks_alloc_lock();
 967                 if (!new_fl)
 968                         return -ENOMEM;
 969         }
 970
 971         percpu_down_read(&file_rwsem);
 972         spin_lock(&ctx->flc_lock);
 973         if (request->fl_flags & FL_ACCESS)
 974                 goto find_conflict;
 975
 976         list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
 977                 if (request->fl_file != fl->fl_file)
 978                         continue;
 979                 if (request->fl_type == fl->fl_type)
 980                         goto out;
 981                 found = true;
 982                 locks_delete_lock_ctx(fl, &dispose);
 983                 break;
 984         }
 985
 986         if (request->fl_type == F_UNLCK) {
 987                 if ((request->fl_flags & FL_EXISTS) && !found)
 988                         error = -ENOENT;
 989                 goto out;
 990         }
 991
 992 find_conflict:
 993         list_for_each_entry(fl, &ctx->flc_flock, fl_list) {
 994                 if (!flock_locks_conflict(request, fl))
 995                         continue;
 996                 error = -EAGAIN;
 997                 if (!(request->fl_flags & FL_SLEEP))
 998                         goto out;
 999                 error = FILE_LOCK_DEFERRED;
1000                 locks_insert_block(fl, request, flock_locks_conflict);
1001                 goto out;
1002         }
1003         if (request->fl_flags & FL_ACCESS)
1004                 goto out;
1005         locks_copy_lock(new_fl, request);
1006         locks_move_blocks(new_fl, request);
1007         locks_insert_lock_ctx(new_fl, &ctx->flc_flock);
1008         new_fl = NULL;
1009         error = 0;
1010
1011 out:
1012         spin_unlock(&ctx->flc_lock);
1013         percpu_up_read(&file_rwsem);
1014         if (new_fl)
1015                 locks_free_lock(new_fl);
1016         locks_dispose_list(&dispose);
1017         trace_flock_lock_inode(inode, request, error);
1018         return error;
1019 }
1020
1021 static int posix_lock_inode(struct inode *inode, struct file_lock *request,
1022                             struct file_lock *conflock)
1023 {
1024         struct file_lock *fl, *tmp;
1025         struct file_lock *new_fl = NULL;
1026         struct file_lock *new_fl2 = NULL;
1027         struct file_lock *left = NULL;
1028         struct file_lock *right = NULL;
1029         struct file_lock_context *ctx;
1030         int error;
1031         bool added = false;
1032         LIST_HEAD(dispose);
1033
1034         ctx = locks_get_lock_context(inode, request->fl_type);
1035         if (!ctx)
1036                 return (request->fl_type == F_UNLCK) ? 0 : -ENOMEM;
1037
1038         /*
1039          * We may need two file_lock structures for this operation,
1040          * so we get them in advance to avoid races.
1041          *
1042          * In some cases we can be sure, that no new locks will be needed
1043          */
1044         if (!(request->fl_flags & FL_ACCESS) &&
1045             (request->fl_type != F_UNLCK ||
1046              request->fl_start != 0 || request->fl_end != OFFSET_MAX)) {
1047                 new_fl = locks_alloc_lock();
1048                 new_fl2 = locks_alloc_lock();
1049         }
1050
1051         percpu_down_read(&file_rwsem);
1052         spin_lock(&ctx->flc_lock);
1053         /*
1054          * New lock request. Walk all POSIX locks and look for conflicts. If
1055          * there are any, either return error or put the request on the
1056          * blocker's list of waiters and the global blocked_hash.
1057          */
1058         if (request->fl_type != F_UNLCK) {
1059                 list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1060                         if (!posix_locks_conflict(request, fl))
1061                                 continue;
1062                         if (conflock)
1063                                 locks_copy_conflock(conflock, fl);
1064                         error = -EAGAIN;
1065                         if (!(request->fl_flags & FL_SLEEP))
1066                                 goto out;
1067                         /*
1068                          * Deadlock detection and insertion into the blocked
1069                          * locks list must be done while holding the same lock!
1070                          */
1071                         error = -EDEADLK;
1072                         spin_lock(&blocked_lock_lock);
1073                         /*
1074                          * Ensure that we don't find any locks blocked on this
1075                          * request during deadlock detection.
1076                          */
1077                         __locks_wake_up_blocks(request);
1078                         if (likely(!posix_locks_deadlock(request, fl))) {
1079                                 error = FILE_LOCK_DEFERRED;
1080                                 __locks_insert_block(fl, request,
1081                                                      posix_locks_conflict);
1082                         }
1083                         spin_unlock(&blocked_lock_lock);
1084                         goto out;
1085                 }
1086         }
1087
1088         /* If we're just looking for a conflict, we're done. */
1089         error = 0;
1090         if (request->fl_flags & FL_ACCESS)
1091                 goto out;
1092
1093         /* Find the first old lock with the same owner as the new lock */
1094         list_for_each_entry(fl, &ctx->flc_posix, fl_list) {
1095                 if (posix_same_owner(request, fl))
1096                         break;
1097         }
1098
1099         /* Process locks with this owner. */
1100         list_for_each_entry_safe_from(fl, tmp, &ctx->flc_posix, fl_list) {
1101                 if (!posix_same_owner(request, fl))
1102                         break;
1103
1104                 /* Detect adjacent or overlapping regions (if same lock type) */
1105                 if (request->fl_type == fl->fl_type) {
1106                         /* In all comparisons of start vs end, use
1107                          * "start - 1" rather than "end + 1". If end
1108                          * is OFFSET_MAX, end + 1 will become negative.
1109                          */
1110                         if (fl->fl_end < request->fl_start - 1)
1111                                 continue;
1112                         /* If the next lock in the list has entirely bigger
1113                          * addresses than the new one, insert the lock here.
1114                          */
1115                         if (fl->fl_start - 1 > request->fl_end)
1116                                 break;
1117
1118                         /* If we come here, the new and old lock are of the
1119                          * same type and adjacent or overlapping. Make one
1120                          * lock yielding from the lower start address of both
1121                          * locks to the higher end address.
1122                          */
1123                         if (fl->fl_start > request->fl_start)
1124                                 fl->fl_start = request->fl_start;
1125                         else
1126                                 request->fl_start = fl->fl_start;
1127                         if (fl->fl_end < request->fl_end)
1128                                 fl->fl_end = request->fl_end;
1129                         else
1130                                 request->fl_end = fl->fl_end;
1131                         if (added) {
1132                                 locks_delete_lock_ctx(fl, &dispose);
1133                                 continue;
1134                         }
1135                         request = fl;
1136                         added = true;
1137                 } else {
1138                         /* Processing for different lock types is a bit
1139                          * more complex.
1140                          */
1141                         if (fl->fl_end < request->fl_start)
1142                                 continue;
1143                         if (fl->fl_start > request->fl_end)
1144                                 break;
1145                         if (request->fl_type == F_UNLCK)
1146                                 added = true;
1147                         if (fl->fl_start < request->fl_start)
1148                                 left = fl;
1149                         /* If the next lock in the list has a higher end
1150                          * address than the new one, insert the new one here.
1151                          */
1152                         if (fl->fl_end > request->fl_end) {
1153                                 right = fl;
1154                                 break;
1155                         }
1156                         if (fl->fl_start >= request->fl_start) {
1157                                 /* The new lock completely replaces an old
1158                                  * one (This may happen several times).
1159                                  */
1160                                 if (added) {
1161                                         locks_delete_lock_ctx(fl, &dispose);
1162                                         continue;
1163                                 }
1164                                 /*
1165                                  * Replace the old lock with new_fl, and
1166                                  * remove the old one. It's safe to do the
1167                                  * insert here since we know that we won't be
1168                                  * using new_fl later, and that the lock is
1169                                  * just replacing an existing lock.
1170                                  */
1171                                 error = -ENOLCK;
1172                                 if (!new_fl)
1173                                         goto out;
1174                                 locks_copy_lock(new_fl, request);
1175                                 locks_move_blocks(new_fl, request);
1176                                 request = new_fl;
1177                                 new_fl = NULL;
1178                                 locks_insert_lock_ctx(request, &fl->fl_list);
1179                                 locks_delete_lock_ctx(fl, &dispose);
1180                                 added = true;
1181                         }
1182                 }
1183         }
1184
1185         /*
1186          * The above code only modifies existing locks in case of merging or
1187          * replacing. If new lock(s) need to be inserted all modifications are
1188          * done below this, so it's safe yet to bail out.
1189          */
1190         error = -ENOLCK; /* "no luck" */
1191         if (right && left == right && !new_fl2)
1192                 goto out;
1193
1194         error = 0;
1195         if (!added) {
1196                 if (request->fl_type == F_UNLCK) {
1197                         if (request->fl_flags & FL_EXISTS)
1198                                 error = -ENOENT;
1199                         goto out;
1200                 }
1201
1202                 if (!new_fl) {
1203                         error = -ENOLCK;
1204                         goto out;
1205                 }
1206                 locks_copy_lock(new_fl, request);
1207                 locks_move_blocks(new_fl, request);
1208                 locks_insert_lock_ctx(new_fl, &fl->fl_list);
1209                 fl = new_fl;
1210                 new_fl = NULL;
1211         }
1212         if (right) {
1213                 if (left == right) {
1214                         /* The new lock breaks the old one in two pieces,
1215                          * so we have to use the second new lock.
1216                          */
1217                         left = new_fl2;
1218                         new_fl2 = NULL;
1219                         locks_copy_lock(left, right);
1220                         locks_insert_lock_ctx(left, &fl->fl_list);
1221                 }
1222                 right->fl_start = request->fl_end + 1;
1223                 locks_wake_up_blocks(right);
1224         }
1225         if (left) {
1226                 left->fl_end = request->fl_start - 1;
1227                 locks_wake_up_blocks(left);
1228         }
1229  out:
1230         spin_unlock(&ctx->flc_lock);
1231         percpu_up_read(&file_rwsem);
1232         /*
1233          * Free any unused locks.
1234          */
1235         if (new_fl)
1236                 locks_free_lock(new_fl);
1237         if (new_fl2)
1238                 locks_free_lock(new_fl2);
1239         locks_dispose_list(&dispose);
1240         trace_posix_lock_inode(inode, request, error);
1241
1242         return error;
1243 }
1244
1245 /**
1246  * posix_lock_file - Apply a POSIX-style lock to a file
1247  * @filp: The file to apply the lock to
1248  * @fl: The lock to be applied
1249  * @conflock: Place to return a copy of the conflicting lock, if found.
1250  *
1251  * Add a POSIX style lock to a file.
1252  * We merge adjacent & overlapping locks whenever possible.
1253  * POSIX locks are sorted by owner task, then by starting address
1254  *
1255  * Note that if called with an FL_EXISTS argument, the caller may determine
1256  * whether or not a lock was successfully freed by testing the return
1257  * value for -ENOENT.
1258  */
1259 int posix_lock_file(struct file *filp, struct file_lock *fl,
1260                         struct file_lock *conflock)
1261 {
1262         return posix_lock_inode(locks_inode(filp), fl, conflock);
1263 }
1264 EXPORT_SYMBOL(posix_lock_file);
1265
1266 /**
1267  * posix_lock_inode_wait - Apply a POSIX-style lock to a file
1268  * @inode: inode of file to which lock request should be applied
1269  * @fl: The lock to be applied
1270  *
1271  * Apply a POSIX style lock request to an inode.
1272  */
1273 static int posix_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1274 {
1275         int error;
1276         might_sleep ();
1277         for (;;) {
1278                 error = posix_lock_inode(inode, fl, NULL);
1279                 if (error != FILE_LOCK_DEFERRED)
1280                         break;
1281                 error = wait_event_interruptible(fl->fl_wait,
1282                                         list_empty(&fl->fl_blocked_member));
1283                 if (error)
1284                         break;
1285         }
1286         locks_delete_block(fl);
1287         return error;
1288 }
1289
1290 static void lease_clear_pending(struct file_lock *fl, int arg)
1291 {
1292         switch (arg) {
1293         case F_UNLCK:
1294                 fl->fl_flags &= ~FL_UNLOCK_PENDING;
1295                 fallthrough;
1296         case F_RDLCK:
1297                 fl->fl_flags &= ~FL_DOWNGRADE_PENDING;
1298         }
1299 }
1300
1301 /* We already had a lease on this file; just change its type */
1302 int lease_modify(struct file_lock *fl, int arg, struct list_head *dispose)
1303 {
1304         int error = assign_type(fl, arg);
1305
1306         if (error)
1307                 return error;
1308         lease_clear_pending(fl, arg);
1309         locks_wake_up_blocks(fl);
1310         if (arg == F_UNLCK) {
1311                 struct file *filp = fl->fl_file;
1312
1313                 f_delown(filp);
1314                 filp->f_owner.signum = 0;
1315                 fasync_helper(0, fl->fl_file, 0, &fl->fl_fasync);
1316                 if (fl->fl_fasync != NULL) {
1317                         printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
1318                         fl->fl_fasync = NULL;
1319                 }
1320                 locks_delete_lock_ctx(fl, dispose);
1321         }
1322         return 0;
1323 }
1324 EXPORT_SYMBOL(lease_modify);
1325
1326 static bool past_time(unsigned long then)
1327 {
1328         if (!then)
1329                 /* 0 is a special value meaning "this never expires": */
1330                 return false;
1331         return time_after(jiffies, then);
1332 }
1333
1334 static void time_out_leases(struct inode *inode, struct list_head *dispose)
1335 {
1336         struct file_lock_context *ctx = inode->i_flctx;
1337         struct file_lock *fl, *tmp;
1338
1339         lockdep_assert_held(&ctx->flc_lock);
1340
1341         list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1342                 trace_time_out_leases(inode, fl);
1343                 if (past_time(fl->fl_downgrade_time))
1344                         lease_modify(fl, F_RDLCK, dispose);
1345                 if (past_time(fl->fl_break_time))
1346                         lease_modify(fl, F_UNLCK, dispose);
1347         }
1348 }
1349
1350 static bool leases_conflict(struct file_lock *lease, struct file_lock *breaker)
1351 {
1352         bool rc;
1353
1354         if (lease->fl_lmops->lm_breaker_owns_lease
1355                         && lease->fl_lmops->lm_breaker_owns_lease(lease))
1356                 return false;
1357         if ((breaker->fl_flags & FL_LAYOUT) != (lease->fl_flags & FL_LAYOUT)) {
1358                 rc = false;
1359                 goto trace;
1360         }
1361         if ((breaker->fl_flags & FL_DELEG) && (lease->fl_flags & FL_LEASE)) {
1362                 rc = false;
1363                 goto trace;
1364         }
1365
1366         rc = locks_conflict(breaker, lease);
1367 trace:
1368         trace_leases_conflict(rc, lease, breaker);
1369         return rc;
1370 }
1371
1372 static bool
1373 any_leases_conflict(struct inode *inode, struct file_lock *breaker)
1374 {
1375         struct file_lock_context *ctx = inode->i_flctx;
1376         struct file_lock *fl;
1377
1378         lockdep_assert_held(&ctx->flc_lock);
1379
1380         list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1381                 if (leases_conflict(fl, breaker))
1382                         return true;
1383         }
1384         return false;
1385 }
1386
1387 /**
1388  *      __break_lease   -       revoke all outstanding leases on file
1389  *      @inode: the inode of the file to return
1390  *      @mode: O_RDONLY: break only write leases; O_WRONLY or O_RDWR:
1391  *          break all leases
1392  *      @type: FL_LEASE: break leases and delegations; FL_DELEG: break
1393  *          only delegations
1394  *
1395  *      break_lease (inlined for speed) has checked there already is at least
1396  *      some kind of lock (maybe a lease) on this file.  Leases are broken on
1397  *      a call to open() or truncate().  This function can sleep unless you
1398  *      specified %O_NONBLOCK to your open().
1399  */
1400 int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
1401 {
1402         int error = 0;
1403         struct file_lock_context *ctx;
1404         struct file_lock *new_fl, *fl, *tmp;
1405         unsigned long break_time;
1406         int want_write = (mode & O_ACCMODE) != O_RDONLY;
1407         LIST_HEAD(dispose);
1408
1409         new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
1410         if (IS_ERR(new_fl))
1411                 return PTR_ERR(new_fl);
1412         new_fl->fl_flags = type;
1413
1414         /* typically we will check that ctx is non-NULL before calling */
1415         ctx = smp_load_acquire(&inode->i_flctx);
1416         if (!ctx) {
1417                 WARN_ON_ONCE(1);
1418                 goto free_lock;
1419         }
1420
1421         percpu_down_read(&file_rwsem);
1422         spin_lock(&ctx->flc_lock);
1423
1424         time_out_leases(inode, &dispose);
1425
1426         if (!any_leases_conflict(inode, new_fl))
1427                 goto out;
1428
1429         break_time = 0;
1430         if (lease_break_time > 0) {
1431                 break_time = jiffies + lease_break_time * HZ;
1432                 if (break_time == 0)
1433                         break_time++;   /* so that 0 means no break time */
1434         }
1435
1436         list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list) {
1437                 if (!leases_conflict(fl, new_fl))
1438                         continue;
1439                 if (want_write) {
1440                         if (fl->fl_flags & FL_UNLOCK_PENDING)
1441                                 continue;
1442                         fl->fl_flags |= FL_UNLOCK_PENDING;
1443                         fl->fl_break_time = break_time;
1444                 } else {
1445                         if (lease_breaking(fl))
1446                                 continue;
1447                         fl->fl_flags |= FL_DOWNGRADE_PENDING;
1448                         fl->fl_downgrade_time = break_time;
1449                 }
1450                 if (fl->fl_lmops->lm_break(fl))
1451                         locks_delete_lock_ctx(fl, &dispose);
1452         }
1453
1454         if (list_empty(&ctx->flc_lease))
1455                 goto out;
1456
1457         if (mode & O_NONBLOCK) {
1458                 trace_break_lease_noblock(inode, new_fl);
1459                 error = -EWOULDBLOCK;
1460                 goto out;
1461         }
1462
1463 restart:
1464         fl = list_first_entry(&ctx->flc_lease, struct file_lock, fl_list);
1465         break_time = fl->fl_break_time;
1466         if (break_time != 0)
1467                 break_time -= jiffies;
1468         if (break_time == 0)
1469                 break_time++;
1470         locks_insert_block(fl, new_fl, leases_conflict);
1471         trace_break_lease_block(inode, new_fl);
1472         spin_unlock(&ctx->flc_lock);
1473         percpu_up_read(&file_rwsem);
1474
1475         locks_dispose_list(&dispose);
1476         error = wait_event_interruptible_timeout(new_fl->fl_wait,
1477                                         list_empty(&new_fl->fl_blocked_member),
1478                                         break_time);
1479
1480         percpu_down_read(&file_rwsem);
1481         spin_lock(&ctx->flc_lock);
1482         trace_break_lease_unblock(inode, new_fl);
1483         locks_delete_block(new_fl);
1484         if (error >= 0) {
1485                 /*
1486                  * Wait for the next conflicting lease that has not been
1487                  * broken yet
1488                  */
1489                 if (error == 0)
1490                         time_out_leases(inode, &dispose);
1491                 if (any_leases_conflict(inode, new_fl))
1492                         goto restart;
1493                 error = 0;
1494         }
1495 out:
1496         spin_unlock(&ctx->flc_lock);
1497         percpu_up_read(&file_rwsem);
1498         locks_dispose_list(&dispose);
1499 free_lock:
1500         locks_free_lock(new_fl);
1501         return error;
1502 }
1503 EXPORT_SYMBOL(__break_lease);
1504
1505 /**
1506  *      lease_get_mtime - update modified time of an inode with exclusive lease
1507  *      @inode: the inode
1508  *      @time:  pointer to a timespec which contains the last modified time
1509  *
1510  * This is to force NFS clients to flush their caches for files with
1511  * exclusive leases.  The justification is that if someone has an
1512  * exclusive lease, then they could be modifying it.
1513  */
1514 void lease_get_mtime(struct inode *inode, struct timespec64 *time)
1515 {
1516         bool has_lease = false;
1517         struct file_lock_context *ctx;
1518         struct file_lock *fl;
1519
1520         ctx = smp_load_acquire(&inode->i_flctx);
1521         if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1522                 spin_lock(&ctx->flc_lock);
1523                 fl = list_first_entry_or_null(&ctx->flc_lease,
1524                                               struct file_lock, fl_list);
1525                 if (fl && (fl->fl_type == F_WRLCK))
1526                         has_lease = true;
1527                 spin_unlock(&ctx->flc_lock);
1528         }
1529
1530         if (has_lease)
1531                 *time = current_time(inode);
1532 }
1533 EXPORT_SYMBOL(lease_get_mtime);
1534
1535 /**
1536  *      fcntl_getlease - Enquire what lease is currently active
1537  *      @filp: the file
1538  *
1539  *      The value returned by this function will be one of
1540  *      (if no lease break is pending):
1541  *
1542  *      %F_RDLCK to indicate a shared lease is held.
1543  *
1544  *      %F_WRLCK to indicate an exclusive lease is held.
1545  *
1546  *      %F_UNLCK to indicate no lease is held.
1547  *
1548  *      (if a lease break is pending):
1549  *
1550  *      %F_RDLCK to indicate an exclusive lease needs to be
1551  *              changed to a shared lease (or removed).
1552  *
1553  *      %F_UNLCK to indicate the lease needs to be removed.
1554  *
1555  *      XXX: sfr & willy disagree over whether F_INPROGRESS
1556  *      should be returned to userspace.
1557  */
1558 int fcntl_getlease(struct file *filp)
1559 {
1560         struct file_lock *fl;
1561         struct inode *inode = locks_inode(filp);
1562         struct file_lock_context *ctx;
1563         int type = F_UNLCK;
1564         LIST_HEAD(dispose);
1565
1566         ctx = smp_load_acquire(&inode->i_flctx);
1567         if (ctx && !list_empty_careful(&ctx->flc_lease)) {
1568                 percpu_down_read(&file_rwsem);
1569                 spin_lock(&ctx->flc_lock);
1570                 time_out_leases(inode, &dispose);
1571                 list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1572                         if (fl->fl_file != filp)
1573                                 continue;
1574                         type = target_leasetype(fl);
1575                         break;
1576                 }
1577                 spin_unlock(&ctx->flc_lock);
1578                 percpu_up_read(&file_rwsem);
1579
1580                 locks_dispose_list(&dispose);
1581         }
1582         return type;
1583 }
1584
1585 /**
1586  * check_conflicting_open - see if the given file points to an inode that has
1587  *                          an existing open that would conflict with the
1588  *                          desired lease.
1589  * @filp:       file to check
1590  * @arg:        type of lease that we're trying to acquire
1591  * @flags:      current lock flags
1592  *
1593  * Check to see if there's an existing open fd on this file that would
1594  * conflict with the lease we're trying to set.
1595  */
1596 static int
1597 check_conflicting_open(struct file *filp, const long arg, int flags)
1598 {
1599         struct inode *inode = locks_inode(filp);
1600         int self_wcount = 0, self_rcount = 0;
1601
1602         if (flags & FL_LAYOUT)
1603                 return 0;
1604         if (flags & FL_DELEG)
1605                 /* We leave these checks to the caller */
1606                 return 0;
1607
1608         if (arg == F_RDLCK)
1609                 return inode_is_open_for_write(inode) ? -EAGAIN : 0;
1610         else if (arg != F_WRLCK)
1611                 return 0;
1612
1613         /*
1614          * Make sure that only read/write count is from lease requestor.
1615          * Note that this will result in denying write leases when i_writecount
1616          * is negative, which is what we want.  (We shouldn't grant write leases
1617          * on files open for execution.)
1618          */
1619         if (filp->f_mode & FMODE_WRITE)
1620                 self_wcount = 1;
1621         else if (filp->f_mode & FMODE_READ)
1622                 self_rcount = 1;
1623
1624         if (atomic_read(&inode->i_writecount) != self_wcount ||
1625             atomic_read(&inode->i_readcount) != self_rcount)
1626                 return -EAGAIN;
1627
1628         return 0;
1629 }
1630
1631 static int
1632 generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
1633 {
1634         struct file_lock *fl, *my_fl = NULL, *lease;
1635         struct inode *inode = locks_inode(filp);
1636         struct file_lock_context *ctx;
1637         bool is_deleg = (*flp)->fl_flags & FL_DELEG;
1638         int error;
1639         LIST_HEAD(dispose);
1640
1641         lease = *flp;
1642         trace_generic_add_lease(inode, lease);
1643
1644         /* Note that arg is never F_UNLCK here */
1645         ctx = locks_get_lock_context(inode, arg);
1646         if (!ctx)
1647                 return -ENOMEM;
1648
1649         /*
1650          * In the delegation case we need mutual exclusion with
1651          * a number of operations that take the i_mutex.  We trylock
1652          * because delegations are an optional optimization, and if
1653          * there's some chance of a conflict--we'd rather not
1654          * bother, maybe that's a sign this just isn't a good file to
1655          * hand out a delegation on.
1656          */
1657         if (is_deleg && !inode_trylock(inode))
1658                 return -EAGAIN;
1659
1660         if (is_deleg && arg == F_WRLCK) {
1661                 /* Write delegations are not currently supported: */
1662                 inode_unlock(inode);
1663                 WARN_ON_ONCE(1);
1664                 return -EINVAL;
1665         }
1666
1667         percpu_down_read(&file_rwsem);
1668         spin_lock(&ctx->flc_lock);
1669         time_out_leases(inode, &dispose);
1670         error = check_conflicting_open(filp, arg, lease->fl_flags);
1671         if (error)
1672                 goto out;
1673
1674         /*
1675          * At this point, we know that if there is an exclusive
1676          * lease on this file, then we hold it on this filp
1677          * (otherwise our open of this file would have blocked).
1678          * And if we are trying to acquire an exclusive lease,
1679          * then the file is not open by anyone (including us)
1680          * except for this filp.
1681          */
1682         error = -EAGAIN;
1683         list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1684                 if (fl->fl_file == filp &&
1685                     fl->fl_owner == lease->fl_owner) {
1686                         my_fl = fl;
1687                         continue;
1688                 }
1689
1690                 /*
1691                  * No exclusive leases if someone else has a lease on
1692                  * this file:
1693                  */
1694                 if (arg == F_WRLCK)
1695                         goto out;
1696                 /*
1697                  * Modifying our existing lease is OK, but no getting a
1698                  * new lease if someone else is opening for write:
1699                  */
1700                 if (fl->fl_flags & FL_UNLOCK_PENDING)
1701                         goto out;
1702         }
1703
1704         if (my_fl != NULL) {
1705                 lease = my_fl;
1706                 error = lease->fl_lmops->lm_change(lease, arg, &dispose);
1707                 if (error)
1708                         goto out;
1709                 goto out_setup;
1710         }
1711
1712         error = -EINVAL;
1713         if (!leases_enable)
1714                 goto out;
1715
1716         locks_insert_lock_ctx(lease, &ctx->flc_lease);
1717         /*
1718          * The check in break_lease() is lockless. It's possible for another
1719          * open to race in after we did the earlier check for a conflicting
1720          * open but before the lease was inserted. Check again for a
1721          * conflicting open and cancel the lease if there is one.
1722          *
1723          * We also add a barrier here to ensure that the insertion of the lock
1724          * precedes these checks.
1725          */
1726         smp_mb();
1727         error = check_conflicting_open(filp, arg, lease->fl_flags);
1728         if (error) {
1729                 locks_unlink_lock_ctx(lease);
1730                 goto out;
1731         }
1732
1733 out_setup:
1734         if (lease->fl_lmops->lm_setup)
1735                 lease->fl_lmops->lm_setup(lease, priv);
1736 out:
1737         spin_unlock(&ctx->flc_lock);
1738         percpu_up_read(&file_rwsem);
1739         locks_dispose_list(&dispose);
1740         if (is_deleg)
1741                 inode_unlock(inode);
1742         if (!error && !my_fl)
1743                 *flp = NULL;
1744         return error;
1745 }
1746
1747 static int generic_delete_lease(struct file *filp, void *owner)
1748 {
1749         int error = -EAGAIN;
1750         struct file_lock *fl, *victim = NULL;
1751         struct inode *inode = locks_inode(filp);
1752         struct file_lock_context *ctx;
1753         LIST_HEAD(dispose);
1754
1755         ctx = smp_load_acquire(&inode->i_flctx);
1756         if (!ctx) {
1757                 trace_generic_delete_lease(inode, NULL);
1758                 return error;
1759         }
1760
1761         percpu_down_read(&file_rwsem);
1762         spin_lock(&ctx->flc_lock);
1763         list_for_each_entry(fl, &ctx->flc_lease, fl_list) {
1764                 if (fl->fl_file == filp &&
1765                     fl->fl_owner == owner) {
1766                         victim = fl;
1767                         break;
1768                 }
1769         }
1770         trace_generic_delete_lease(inode, victim);
1771         if (victim)
1772                 error = fl->fl_lmops->lm_change(victim, F_UNLCK, &dispose);
1773         spin_unlock(&ctx->flc_lock);
1774         percpu_up_read(&file_rwsem);
1775         locks_dispose_list(&dispose);
1776         return error;
1777 }
1778
1779 /**
1780  *      generic_setlease        -       sets a lease on an open file
1781  *      @filp:  file pointer
1782  *      @arg:   type of lease to obtain
1783  *      @flp:   input - file_lock to use, output - file_lock inserted
1784  *      @priv:  private data for lm_setup (may be NULL if lm_setup
1785  *              doesn't require it)
1786  *
1787  *      The (input) flp->fl_lmops->lm_break function is required
1788  *      by break_lease().
1789  */
1790 int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
1791                         void **priv)
1792 {
1793         struct inode *inode = locks_inode(filp);
1794         int error;
1795
1796         if ((!uid_eq(current_fsuid(), inode->i_uid)) && !capable(CAP_LEASE))
1797                 return -EACCES;
1798         if (!S_ISREG(inode->i_mode))
1799                 return -EINVAL;
1800         error = security_file_lock(filp, arg);
1801         if (error)
1802                 return error;
1803
1804         switch (arg) {
1805         case F_UNLCK:
1806                 return generic_delete_lease(filp, *priv);
1807         case F_RDLCK:
1808         case F_WRLCK:
1809                 if (!(*flp)->fl_lmops->lm_break) {
1810                         WARN_ON_ONCE(1);
1811                         return -ENOLCK;
1812                 }
1813
1814                 return generic_add_lease(filp, arg, flp, priv);
1815         default:
1816                 return -EINVAL;
1817         }
1818 }
1819 EXPORT_SYMBOL(generic_setlease);
1820
1821 #if IS_ENABLED(CONFIG_SRCU)
1822 /*
1823  * Kernel subsystems can register to be notified on any attempt to set
1824  * a new lease with the lease_notifier_chain. This is used by (e.g.) nfsd
1825  * to close files that it may have cached when there is an attempt to set a
1826  * conflicting lease.
1827  */
1828 static struct srcu_notifier_head lease_notifier_chain;
1829
1830 static inline void
1831 lease_notifier_chain_init(void)
1832 {
1833         srcu_init_notifier_head(&lease_notifier_chain);
1834 }
1835
1836 static inline void
1837 setlease_notifier(long arg, struct file_lock *lease)
1838 {
1839         if (arg != F_UNLCK)
1840                 srcu_notifier_call_chain(&lease_notifier_chain, arg, lease);
1841 }
1842
1843 int lease_register_notifier(struct notifier_block *nb)
1844 {
1845         return srcu_notifier_chain_register(&lease_notifier_chain, nb);
1846 }
1847 EXPORT_SYMBOL_GPL(lease_register_notifier);
1848
1849 void lease_unregister_notifier(struct notifier_block *nb)
1850 {
1851         srcu_notifier_chain_unregister(&lease_notifier_chain, nb);
1852 }
1853 EXPORT_SYMBOL_GPL(lease_unregister_notifier);
1854
1855 #else /* !IS_ENABLED(CONFIG_SRCU) */
1856 static inline void
1857 lease_notifier_chain_init(void)
1858 {
1859 }
1860
1861 static inline void
1862 setlease_notifier(long arg, struct file_lock *lease)
1863 {
1864 }
1865
1866 int lease_register_notifier(struct notifier_block *nb)
1867 {
1868         return 0;
1869 }
1870 EXPORT_SYMBOL_GPL(lease_register_notifier);
1871
1872 void lease_unregister_notifier(struct notifier_block *nb)
1873 {
1874 }
1875 EXPORT_SYMBOL_GPL(lease_unregister_notifier);
1876
1877 #endif /* IS_ENABLED(CONFIG_SRCU) */
1878
1879 /**
1880  * vfs_setlease        -       sets a lease on an open file
1881  * @filp:       file pointer
1882  * @arg:        type of lease to obtain
1883  * @lease:      file_lock to use when adding a lease
1884  * @priv:       private info for lm_setup when adding a lease (may be
1885  *              NULL if lm_setup doesn't require it)
1886  *
1887  * Call this to establish a lease on the file. The "lease" argument is not
1888  * used for F_UNLCK requests and may be NULL. For commands that set or alter
1889  * an existing lease, the ``(*lease)->fl_lmops->lm_break`` operation must be
1890  * set; if not, this function will return -ENOLCK (and generate a scary-looking
1891  * stack trace).
1892  *
1893  * The "priv" pointer is passed directly to the lm_setup function as-is. It
1894  * may be NULL if the lm_setup operation doesn't require it.
1895  */
1896 int
1897 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
1898 {
1899         if (lease)
1900                 setlease_notifier(arg, *lease);
1901         if (filp->f_op->setlease)
1902                 return filp->f_op->setlease(filp, arg, lease, priv);
1903         else
1904                 return generic_setlease(filp, arg, lease, priv);
1905 }
1906 EXPORT_SYMBOL_GPL(vfs_setlease);
1907
1908 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
1909 {
1910         struct file_lock *fl;
1911         struct fasync_struct *new;
1912         int error;
1913
1914         fl = lease_alloc(filp, arg);
1915         if (IS_ERR(fl))
1916                 return PTR_ERR(fl);
1917
1918         new = fasync_alloc();
1919         if (!new) {
1920                 locks_free_lock(fl);
1921                 return -ENOMEM;
1922         }
1923         new->fa_fd = fd;
1924
1925         error = vfs_setlease(filp, arg, &fl, (void **)&new);
1926         if (fl)
1927                 locks_free_lock(fl);
1928         if (new)
1929                 fasync_free(new);
1930         return error;
1931 }
1932
1933 /**
1934  *      fcntl_setlease  -       sets a lease on an open file
1935  *      @fd: open file descriptor
1936  *      @filp: file pointer
1937  *      @arg: type of lease to obtain
1938  *
1939  *      Call this fcntl to establish a lease on the file.
1940  *      Note that you also need to call %F_SETSIG to
1941  *      receive a signal when the lease is broken.
1942  */
1943 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
1944 {
1945         if (arg == F_UNLCK)
1946                 return vfs_setlease(filp, F_UNLCK, NULL, (void **)&filp);
1947         return do_fcntl_add_lease(fd, filp, arg);
1948 }
1949
1950 /**
1951  * flock_lock_inode_wait - Apply a FLOCK-style lock to a file
1952  * @inode: inode of the file to apply to
1953  * @fl: The lock to be applied
1954  *
1955  * Apply a FLOCK style lock request to an inode.
1956  */
1957 static int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1958 {
1959         int error;
1960         might_sleep();
1961         for (;;) {
1962                 error = flock_lock_inode(inode, fl);
1963                 if (error != FILE_LOCK_DEFERRED)
1964                         break;
1965                 error = wait_event_interruptible(fl->fl_wait,
1966                                 list_empty(&fl->fl_blocked_member));
1967                 if (error)
1968                         break;
1969         }
1970         locks_delete_block(fl);
1971         return error;
1972 }
1973
1974 /**
1975  * locks_lock_inode_wait - Apply a lock to an inode
1976  * @inode: inode of the file to apply to
1977  * @fl: The lock to be applied
1978  *
1979  * Apply a POSIX or FLOCK style lock request to an inode.
1980  */
1981 int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl)
1982 {
1983         int res = 0;
1984         switch (fl->fl_flags & (FL_POSIX|FL_FLOCK)) {
1985                 case FL_POSIX:
1986                         res = posix_lock_inode_wait(inode, fl);
1987                         break;
1988                 case FL_FLOCK:
1989                         res = flock_lock_inode_wait(inode, fl);
1990                         break;
1991                 default:
1992                         BUG();
1993         }
1994         return res;
1995 }
1996 EXPORT_SYMBOL(locks_lock_inode_wait);
1997
1998 /**
1999  *      sys_flock: - flock() system call.
2000  *      @fd: the file descriptor to lock.
2001  *      @cmd: the type of lock to apply.
2002  *
2003  *      Apply a %FL_FLOCK style lock to an open file descriptor.
2004  *      The @cmd can be one of:
2005  *
2006  *      - %LOCK_SH -- a shared lock.
2007  *      - %LOCK_EX -- an exclusive lock.
2008  *      - %LOCK_UN -- remove an existing lock.
2009  *      - %LOCK_MAND -- a 'mandatory' flock. (DEPRECATED)
2010  *
2011  *      %LOCK_MAND support has been removed from the kernel.
2012  */
2013 SYSCALL_DEFINE2(flock, unsigned int, fd, unsigned int, cmd)
2014 {
2015         struct fd f = fdget(fd);
2016         struct file_lock *lock;
2017         int can_sleep, unlock;
2018         int error;
2019
2020         error = -EBADF;
2021         if (!f.file)
2022                 goto out;
2023
2024         can_sleep = !(cmd & LOCK_NB);
2025         cmd &= ~LOCK_NB;
2026         unlock = (cmd == LOCK_UN);
2027
2028         if (!unlock && !(f.file->f_mode & (FMODE_READ|FMODE_WRITE)))
2029                 goto out_putf;
2030
2031         /*
2032          * LOCK_MAND locks were broken for a long time in that they never
2033          * conflicted with one another and didn't prevent any sort of open,
2034          * read or write activity.
2035          *
2036          * Just ignore these requests now, to preserve legacy behavior, but
2037          * throw a warning to let people know that they don't actually work.
2038          */
2039         if (cmd & LOCK_MAND) {
2040                 pr_warn_once("Attempt to set a LOCK_MAND lock via flock(2). This support has been removed and the request ignored.\n");
2041                 error = 0;
2042                 goto out_putf;
2043         }
2044
2045         lock = flock_make_lock(f.file, cmd, NULL);
2046         if (IS_ERR(lock)) {
2047                 error = PTR_ERR(lock);
2048                 goto out_putf;
2049         }
2050
2051         if (can_sleep)
2052                 lock->fl_flags |= FL_SLEEP;
2053
2054         error = security_file_lock(f.file, lock->fl_type);
2055         if (error)
2056                 goto out_free;
2057
2058         if (f.file->f_op->flock)
2059                 error = f.file->f_op->flock(f.file,
2060                                           (can_sleep) ? F_SETLKW : F_SETLK,
2061                                           lock);
2062         else
2063                 error = locks_lock_file_wait(f.file, lock);
2064
2065  out_free:
2066         locks_free_lock(lock);
2067
2068  out_putf:
2069         fdput(f);
2070  out:
2071         return error;
2072 }
2073
2074 /**
2075  * vfs_test_lock - test file byte range lock
2076  * @filp: The file to test lock for
2077  * @fl: The lock to test; also used to hold result
2078  *
2079  * Returns -ERRNO on failure.  Indicates presence of conflicting lock by
2080  * setting conf->fl_type to something other than F_UNLCK.
2081  */
2082 int vfs_test_lock(struct file *filp, struct file_lock *fl)
2083 {
2084         if (filp->f_op->lock)
2085                 return filp->f_op->lock(filp, F_GETLK, fl);
2086         posix_test_lock(filp, fl);
2087         return 0;
2088 }
2089 EXPORT_SYMBOL_GPL(vfs_test_lock);
2090
2091 /**
2092  * locks_translate_pid - translate a file_lock's fl_pid number into a namespace
2093  * @fl: The file_lock who's fl_pid should be translated
2094  * @ns: The namespace into which the pid should be translated
2095  *
2096  * Used to tranlate a fl_pid into a namespace virtual pid number
2097  */
2098 static pid_t locks_translate_pid(struct file_lock *fl, struct pid_namespace *ns)
2099 {
2100         pid_t vnr;
2101         struct pid *pid;
2102
2103         if (IS_OFDLCK(fl))
2104                 return -1;
2105         if (IS_REMOTELCK(fl))
2106                 return fl->fl_pid;
2107         /*
2108          * If the flock owner process is dead and its pid has been already
2109          * freed, the translation below won't work, but we still want to show
2110          * flock owner pid number in init pidns.
2111          */
2112         if (ns == &init_pid_ns)
2113                 return (pid_t)fl->fl_pid;
2114
2115         rcu_read_lock();
2116         pid = find_pid_ns(fl->fl_pid, &init_pid_ns);
2117         vnr = pid_nr_ns(pid, ns);
2118         rcu_read_unlock();
2119         return vnr;
2120 }
2121
2122 static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl)
2123 {
2124         flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2125 #if BITS_PER_LONG == 32
2126         /*
2127          * Make sure we can represent the posix lock via
2128          * legacy 32bit flock.
2129          */
2130         if (fl->fl_start > OFFT_OFFSET_MAX)
2131                 return -EOVERFLOW;
2132         if (fl->fl_end != OFFSET_MAX && fl->fl_end > OFFT_OFFSET_MAX)
2133                 return -EOVERFLOW;
2134 #endif
2135         flock->l_start = fl->fl_start;
2136         flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2137                 fl->fl_end - fl->fl_start + 1;
2138         flock->l_whence = 0;
2139         flock->l_type = fl->fl_type;
2140         return 0;
2141 }
2142
2143 #if BITS_PER_LONG == 32
2144 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl)
2145 {
2146         flock->l_pid = locks_translate_pid(fl, task_active_pid_ns(current));
2147         flock->l_start = fl->fl_start;
2148         flock->l_len = fl->fl_end == OFFSET_MAX ? 0 :
2149                 fl->fl_end - fl->fl_start + 1;
2150         flock->l_whence = 0;
2151         flock->l_type = fl->fl_type;
2152 }
2153 #endif
2154
2155 /* Report the first existing lock that would conflict with l.
2156  * This implements the F_GETLK command of fcntl().
2157  */
2158 int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock *flock)
2159 {
2160         struct file_lock *fl;
2161         int error;
2162
2163         fl = locks_alloc_lock();
2164         if (fl == NULL)
2165                 return -ENOMEM;
2166         error = -EINVAL;
2167         if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
2168                 goto out;
2169
2170         error = flock_to_posix_lock(filp, fl, flock);
2171         if (error)
2172                 goto out;
2173
2174         if (cmd == F_OFD_GETLK) {
2175                 error = -EINVAL;
2176                 if (flock->l_pid != 0)
2177                         goto out;
2178
2179                 fl->fl_flags |= FL_OFDLCK;
2180                 fl->fl_owner = filp;
2181         }
2182
2183         error = vfs_test_lock(filp, fl);
2184         if (error)
2185                 goto out;
2186
2187         flock->l_type = fl->fl_type;
2188         if (fl->fl_type != F_UNLCK) {
2189                 error = posix_lock_to_flock(flock, fl);
2190                 if (error)
2191                         goto out;
2192         }
2193 out:
2194         locks_free_lock(fl);
2195         return error;
2196 }
2197
2198 /**
2199  * vfs_lock_file - file byte range lock
2200  * @filp: The file to apply the lock to
2201  * @cmd: type of locking operation (F_SETLK, F_GETLK, etc.)
2202  * @fl: The lock to be applied
2203  * @conf: Place to return a copy of the conflicting lock, if found.
2204  *
2205  * A caller that doesn't care about the conflicting lock may pass NULL
2206  * as the final argument.
2207  *
2208  * If the filesystem defines a private ->lock() method, then @conf will
2209  * be left unchanged; so a caller that cares should initialize it to
2210  * some acceptable default.
2211  *
2212  * To avoid blocking kernel daemons, such as lockd, that need to acquire POSIX
2213  * locks, the ->lock() interface may return asynchronously, before the lock has
2214  * been granted or denied by the underlying filesystem, if (and only if)
2215  * lm_grant is set. Callers expecting ->lock() to return asynchronously
2216  * will only use F_SETLK, not F_SETLKW; they will set FL_SLEEP if (and only if)
2217  * the request is for a blocking lock. When ->lock() does return asynchronously,
2218  * it must return FILE_LOCK_DEFERRED, and call ->lm_grant() when the lock
2219  * request completes.
2220  * If the request is for non-blocking lock the file system should return
2221  * FILE_LOCK_DEFERRED then try to get the lock and call the callback routine
2222  * with the result. If the request timed out the callback routine will return a
2223  * nonzero return code and the file system should release the lock. The file
2224  * system is also responsible to keep a corresponding posix lock when it
2225  * grants a lock so the VFS can find out which locks are locally held and do
2226  * the correct lock cleanup when required.
2227  * The underlying filesystem must not drop the kernel lock or call
2228  * ->lm_grant() before returning to the caller with a FILE_LOCK_DEFERRED
2229  * return code.
2230  */
2231 int vfs_lock_file(struct file *filp, unsigned int cmd, struct file_lock *fl, struct file_lock *conf)
2232 {
2233         if (filp->f_op->lock)
2234                 return filp->f_op->lock(filp, cmd, fl);
2235         else
2236                 return posix_lock_file(filp, fl, conf);
2237 }
2238 EXPORT_SYMBOL_GPL(vfs_lock_file);
2239
2240 static int do_lock_file_wait(struct file *filp, unsigned int cmd,
2241                              struct file_lock *fl)
2242 {
2243         int error;
2244
2245         error = security_file_lock(filp, fl->fl_type);
2246         if (error)
2247                 return error;
2248
2249         for (;;) {
2250                 error = vfs_lock_file(filp, cmd, fl, NULL);
2251                 if (error != FILE_LOCK_DEFERRED)
2252                         break;
2253                 error = wait_event_interruptible(fl->fl_wait,
2254                                         list_empty(&fl->fl_blocked_member));
2255                 if (error)
2256                         break;
2257         }
2258         locks_delete_block(fl);
2259
2260         return error;
2261 }
2262
2263 /* Ensure that fl->fl_file has compatible f_mode for F_SETLK calls */
2264 static int
2265 check_fmode_for_setlk(struct file_lock *fl)
2266 {
2267         switch (fl->fl_type) {
2268         case F_RDLCK:
2269                 if (!(fl->fl_file->f_mode & FMODE_READ))
2270                         return -EBADF;
2271                 break;
2272         case F_WRLCK:
2273                 if (!(fl->fl_file->f_mode & FMODE_WRITE))
2274                         return -EBADF;
2275         }
2276         return 0;
2277 }
2278
2279 /* Apply the lock described by l to an open file descriptor.
2280  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2281  */
2282 int fcntl_setlk(unsigned int fd, struct file *filp, unsigned int cmd,
2283                 struct flock *flock)
2284 {
2285         struct file_lock *file_lock = locks_alloc_lock();
2286         struct inode *inode = locks_inode(filp);
2287         struct file *f;
2288         int error;
2289
2290         if (file_lock == NULL)
2291                 return -ENOLCK;
2292
2293         error = flock_to_posix_lock(filp, file_lock, flock);
2294         if (error)
2295                 goto out;
2296
2297         error = check_fmode_for_setlk(file_lock);
2298         if (error)
2299                 goto out;
2300
2301         /*
2302          * If the cmd is requesting file-private locks, then set the
2303          * FL_OFDLCK flag and override the owner.
2304          */
2305         switch (cmd) {
2306         case F_OFD_SETLK:
2307                 error = -EINVAL;
2308                 if (flock->l_pid != 0)
2309                         goto out;
2310
2311                 cmd = F_SETLK;
2312                 file_lock->fl_flags |= FL_OFDLCK;
2313                 file_lock->fl_owner = filp;
2314                 break;
2315         case F_OFD_SETLKW:
2316                 error = -EINVAL;
2317                 if (flock->l_pid != 0)
2318                         goto out;
2319
2320                 cmd = F_SETLKW;
2321                 file_lock->fl_flags |= FL_OFDLCK;
2322                 file_lock->fl_owner = filp;
2323                 fallthrough;
2324         case F_SETLKW:
2325                 file_lock->fl_flags |= FL_SLEEP;
2326         }
2327
2328         error = do_lock_file_wait(filp, cmd, file_lock);
2329
2330         /*
2331          * Attempt to detect a close/fcntl race and recover by releasing the
2332          * lock that was just acquired. There is no need to do that when we're
2333          * unlocking though, or for OFD locks.
2334          */
2335         if (!error && file_lock->fl_type != F_UNLCK &&
2336             !(file_lock->fl_flags & FL_OFDLCK)) {
2337                 struct files_struct *files = current->files;
2338                 /*
2339                  * We need that spin_lock here - it prevents reordering between
2340                  * update of i_flctx->flc_posix and check for it done in
2341                  * close(). rcu_read_lock() wouldn't do.
2342                  */
2343                 spin_lock(&files->file_lock);
2344                 f = files_lookup_fd_locked(files, fd);
2345                 spin_unlock(&files->file_lock);
2346                 if (f != filp) {
2347                         file_lock->fl_type = F_UNLCK;
2348                         error = do_lock_file_wait(filp, cmd, file_lock);
2349                         WARN_ON_ONCE(error);
2350                         error = -EBADF;
2351                 }
2352         }
2353 out:
2354         trace_fcntl_setlk(inode, file_lock, error);
2355         locks_free_lock(file_lock);
2356         return error;
2357 }
2358
2359 #if BITS_PER_LONG == 32
2360 /* Report the first existing lock that would conflict with l.
2361  * This implements the F_GETLK command of fcntl().
2362  */
2363 int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 *flock)
2364 {
2365         struct file_lock *fl;
2366         int error;
2367
2368         fl = locks_alloc_lock();
2369         if (fl == NULL)
2370                 return -ENOMEM;
2371
2372         error = -EINVAL;
2373         if (flock->l_type != F_RDLCK && flock->l_type != F_WRLCK)
2374                 goto out;
2375
2376         error = flock64_to_posix_lock(filp, fl, flock);
2377         if (error)
2378                 goto out;
2379
2380         if (cmd == F_OFD_GETLK) {
2381                 error = -EINVAL;
2382                 if (flock->l_pid != 0)
2383                         goto out;
2384
2385                 cmd = F_GETLK64;
2386                 fl->fl_flags |= FL_OFDLCK;
2387                 fl->fl_owner = filp;
2388         }
2389
2390         error = vfs_test_lock(filp, fl);
2391         if (error)
2392                 goto out;
2393
2394         flock->l_type = fl->fl_type;
2395         if (fl->fl_type != F_UNLCK)
2396                 posix_lock_to_flock64(flock, fl);
2397
2398 out:
2399         locks_free_lock(fl);
2400         return error;
2401 }
2402
2403 /* Apply the lock described by l to an open file descriptor.
2404  * This implements both the F_SETLK and F_SETLKW commands of fcntl().
2405  */
2406 int fcntl_setlk64(unsigned int fd, struct file *filp, unsigned int cmd,
2407                 struct flock64 *flock)
2408 {
2409         struct file_lock *file_lock = locks_alloc_lock();
2410         struct file *f;
2411         int error;
2412
2413         if (file_lock == NULL)
2414                 return -ENOLCK;
2415
2416         error = flock64_to_posix_lock(filp, file_lock, flock);
2417         if (error)
2418                 goto out;
2419
2420         error = check_fmode_for_setlk(file_lock);
2421         if (error)
2422                 goto out;
2423
2424         /*
2425          * If the cmd is requesting file-private locks, then set the
2426          * FL_OFDLCK flag and override the owner.
2427          */
2428         switch (cmd) {
2429         case F_OFD_SETLK:
2430                 error = -EINVAL;
2431                 if (flock->l_pid != 0)
2432                         goto out;
2433
2434                 cmd = F_SETLK64;
2435                 file_lock->fl_flags |= FL_OFDLCK;
2436                 file_lock->fl_owner = filp;
2437                 break;
2438         case F_OFD_SETLKW:
2439                 error = -EINVAL;
2440                 if (flock->l_pid != 0)
2441                         goto out;
2442
2443                 cmd = F_SETLKW64;
2444                 file_lock->fl_flags |= FL_OFDLCK;
2445                 file_lock->fl_owner = filp;
2446                 fallthrough;
2447         case F_SETLKW64:
2448                 file_lock->fl_flags |= FL_SLEEP;
2449         }
2450
2451         error = do_lock_file_wait(filp, cmd, file_lock);
2452
2453         /*
2454          * Attempt to detect a close/fcntl race and recover by releasing the
2455          * lock that was just acquired. There is no need to do that when we're
2456          * unlocking though, or for OFD locks.
2457          */
2458         if (!error && file_lock->fl_type != F_UNLCK &&
2459             !(file_lock->fl_flags & FL_OFDLCK)) {
2460                 struct files_struct *files = current->files;
2461                 /*
2462                  * We need that spin_lock here - it prevents reordering between
2463                  * update of i_flctx->flc_posix and check for it done in
2464                  * close(). rcu_read_lock() wouldn't do.
2465                  */
2466                 spin_lock(&files->file_lock);
2467                 f = files_lookup_fd_locked(files, fd);
2468                 spin_unlock(&files->file_lock);
2469                 if (f != filp) {
2470                         file_lock->fl_type = F_UNLCK;
2471                         error = do_lock_file_wait(filp, cmd, file_lock);
2472                         WARN_ON_ONCE(error);
2473                         error = -EBADF;
2474                 }
2475         }
2476 out:
2477         locks_free_lock(file_lock);
2478         return error;
2479 }
2480 #endif /* BITS_PER_LONG == 32 */
2481
2482 /*
2483  * This function is called when the file is being removed
2484  * from the task's fd array.  POSIX locks belonging to this task
2485  * are deleted at this time.
2486  */
2487 void locks_remove_posix(struct file *filp, fl_owner_t owner)
2488 {
2489         int error;
2490         struct inode *inode = locks_inode(filp);
2491         struct file_lock lock;
2492         struct file_lock_context *ctx;
2493
2494         /*
2495          * If there are no locks held on this file, we don't need to call
2496          * posix_lock_file().  Another process could be setting a lock on this
2497          * file at the same time, but we wouldn't remove that lock anyway.
2498          */
2499         ctx =  smp_load_acquire(&inode->i_flctx);
2500         if (!ctx || list_empty(&ctx->flc_posix))
2501                 return;
2502
2503         locks_init_lock(&lock);
2504         lock.fl_type = F_UNLCK;
2505         lock.fl_flags = FL_POSIX | FL_CLOSE;
2506         lock.fl_start = 0;
2507         lock.fl_end = OFFSET_MAX;
2508         lock.fl_owner = owner;
2509         lock.fl_pid = current->tgid;
2510         lock.fl_file = filp;
2511         lock.fl_ops = NULL;
2512         lock.fl_lmops = NULL;
2513
2514         error = vfs_lock_file(filp, F_SETLK, &lock, NULL);
2515
2516         if (lock.fl_ops && lock.fl_ops->fl_release_private)
2517                 lock.fl_ops->fl_release_private(&lock);
2518         trace_locks_remove_posix(inode, &lock, error);
2519 }
2520 EXPORT_SYMBOL(locks_remove_posix);
2521
2522 /* The i_flctx must be valid when calling into here */
2523 static void
2524 locks_remove_flock(struct file *filp, struct file_lock_context *flctx)
2525 {
2526         struct file_lock fl;
2527         struct inode *inode = locks_inode(filp);
2528
2529         if (list_empty(&flctx->flc_flock))
2530                 return;
2531
2532         flock_make_lock(filp, LOCK_UN, &fl);
2533         fl.fl_flags |= FL_CLOSE;
2534
2535         if (filp->f_op->flock)
2536                 filp->f_op->flock(filp, F_SETLKW, &fl);
2537         else
2538                 flock_lock_inode(inode, &fl);
2539
2540         if (fl.fl_ops && fl.fl_ops->fl_release_private)
2541                 fl.fl_ops->fl_release_private(&fl);
2542 }
2543
2544 /* The i_flctx must be valid when calling into here */
2545 static void
2546 locks_remove_lease(struct file *filp, struct file_lock_context *ctx)
2547 {
2548         struct file_lock *fl, *tmp;
2549         LIST_HEAD(dispose);
2550
2551         if (list_empty(&ctx->flc_lease))
2552                 return;
2553
2554         percpu_down_read(&file_rwsem);
2555         spin_lock(&ctx->flc_lock);
2556         list_for_each_entry_safe(fl, tmp, &ctx->flc_lease, fl_list)
2557                 if (filp == fl->fl_file)
2558                         lease_modify(fl, F_UNLCK, &dispose);
2559         spin_unlock(&ctx->flc_lock);
2560         percpu_up_read(&file_rwsem);
2561
2562         locks_dispose_list(&dispose);
2563 }
2564
2565 /*
2566  * This function is called on the last close of an open file.
2567  */
2568 void locks_remove_file(struct file *filp)
2569 {
2570         struct file_lock_context *ctx;
2571
2572         ctx = smp_load_acquire(&locks_inode(filp)->i_flctx);
2573         if (!ctx)
2574                 return;
2575
2576         /* remove any OFD locks */
2577         locks_remove_posix(filp, filp);
2578
2579         /* remove flock locks */
2580         locks_remove_flock(filp, ctx);
2581
2582         /* remove any leases */
2583         locks_remove_lease(filp, ctx);
2584
2585         spin_lock(&ctx->flc_lock);
2586         locks_check_ctx_file_list(filp, &ctx->flc_posix, "POSIX");
2587         locks_check_ctx_file_list(filp, &ctx->flc_flock, "FLOCK");
2588         locks_check_ctx_file_list(filp, &ctx->flc_lease, "LEASE");
2589         spin_unlock(&ctx->flc_lock);
2590 }
2591
2592 /**
2593  * vfs_cancel_lock - file byte range unblock lock
2594  * @filp: The file to apply the unblock to
2595  * @fl: The lock to be unblocked
2596  *
2597  * Used by lock managers to cancel blocked requests
2598  */
2599 int vfs_cancel_lock(struct file *filp, struct file_lock *fl)
2600 {
2601         if (filp->f_op->lock)
2602                 return filp->f_op->lock(filp, F_CANCELLK, fl);
2603         return 0;
2604 }
2605 EXPORT_SYMBOL_GPL(vfs_cancel_lock);
2606
2607 #ifdef CONFIG_PROC_FS
2608 #include <linux/proc_fs.h>
2609 #include <linux/seq_file.h>
2610
2611 struct locks_iterator {
2612         int     li_cpu;
2613         loff_t  li_pos;
2614 };
2615
2616 static void lock_get_status(struct seq_file *f, struct file_lock *fl,
2617                             loff_t id, char *pfx, int repeat)
2618 {
2619         struct inode *inode = NULL;
2620         unsigned int fl_pid;
2621         struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
2622         int type;
2623
2624         fl_pid = locks_translate_pid(fl, proc_pidns);
2625         /*
2626          * If lock owner is dead (and pid is freed) or not visible in current
2627          * pidns, zero is shown as a pid value. Check lock info from
2628          * init_pid_ns to get saved lock pid value.
2629          */
2630
2631         if (fl->fl_file != NULL)
2632                 inode = locks_inode(fl->fl_file);
2633
2634         seq_printf(f, "%lld: ", id);
2635
2636         if (repeat)
2637                 seq_printf(f, "%*s", repeat - 1 + (int)strlen(pfx), pfx);
2638
2639         if (IS_POSIX(fl)) {
2640                 if (fl->fl_flags & FL_ACCESS)
2641                         seq_puts(f, "ACCESS");
2642                 else if (IS_OFDLCK(fl))
2643                         seq_puts(f, "OFDLCK");
2644                 else
2645                         seq_puts(f, "POSIX ");
2646
2647                 seq_printf(f, " %s ",
2648                              (inode == NULL) ? "*NOINODE*" : "ADVISORY ");
2649         } else if (IS_FLOCK(fl)) {
2650                 seq_puts(f, "FLOCK  ADVISORY  ");
2651         } else if (IS_LEASE(fl)) {
2652                 if (fl->fl_flags & FL_DELEG)
2653                         seq_puts(f, "DELEG  ");
2654                 else
2655                         seq_puts(f, "LEASE  ");
2656
2657                 if (lease_breaking(fl))
2658                         seq_puts(f, "BREAKING  ");
2659                 else if (fl->fl_file)
2660                         seq_puts(f, "ACTIVE    ");
2661                 else
2662                         seq_puts(f, "BREAKER   ");
2663         } else {
2664                 seq_puts(f, "UNKNOWN UNKNOWN  ");
2665         }
2666         type = IS_LEASE(fl) ? target_leasetype(fl) : fl->fl_type;
2667
2668         seq_printf(f, "%s ", (type == F_WRLCK) ? "WRITE" :
2669                              (type == F_RDLCK) ? "READ" : "UNLCK");
2670         if (inode) {
2671                 /* userspace relies on this representation of dev_t */
2672                 seq_printf(f, "%d %02x:%02x:%lu ", fl_pid,
2673                                 MAJOR(inode->i_sb->s_dev),
2674                                 MINOR(inode->i_sb->s_dev), inode->i_ino);
2675         } else {
2676                 seq_printf(f, "%d <none>:0 ", fl_pid);
2677         }
2678         if (IS_POSIX(fl)) {
2679                 if (fl->fl_end == OFFSET_MAX)
2680                         seq_printf(f, "%Ld EOF\n", fl->fl_start);
2681                 else
2682                         seq_printf(f, "%Ld %Ld\n", fl->fl_start, fl->fl_end);
2683         } else {
2684                 seq_puts(f, "0 EOF\n");
2685         }
2686 }
2687
2688 static struct file_lock *get_next_blocked_member(struct file_lock *node)
2689 {
2690         struct file_lock *tmp;
2691
2692         /* NULL node or root node */
2693         if (node == NULL || node->fl_blocker == NULL)
2694                 return NULL;
2695
2696         /* Next member in the linked list could be itself */
2697         tmp = list_next_entry(node, fl_blocked_member);
2698         if (list_entry_is_head(tmp, &node->fl_blocker->fl_blocked_requests, fl_blocked_member)
2699                 || tmp == node) {
2700                 return NULL;
2701         }
2702
2703         return tmp;
2704 }
2705
2706 static int locks_show(struct seq_file *f, void *v)
2707 {
2708         struct locks_iterator *iter = f->private;
2709         struct file_lock *cur, *tmp;
2710         struct pid_namespace *proc_pidns = proc_pid_ns(file_inode(f->file)->i_sb);
2711         int level = 0;
2712
2713         cur = hlist_entry(v, struct file_lock, fl_link);
2714
2715         if (locks_translate_pid(cur, proc_pidns) == 0)
2716                 return 0;
2717
2718         /* View this crossed linked list as a binary tree, the first member of fl_blocked_requests
2719          * is the left child of current node, the next silibing in fl_blocked_member is the
2720          * right child, we can alse get the parent of current node from fl_blocker, so this
2721          * question becomes traversal of a binary tree
2722          */
2723         while (cur != NULL) {
2724                 if (level)
2725                         lock_get_status(f, cur, iter->li_pos, "-> ", level);
2726                 else
2727                         lock_get_status(f, cur, iter->li_pos, "", level);
2728
2729                 if (!list_empty(&cur->fl_blocked_requests)) {
2730                         /* Turn left */
2731                         cur = list_first_entry_or_null(&cur->fl_blocked_requests,
2732                                 struct file_lock, fl_blocked_member);
2733                         level++;
2734                 } else {
2735                         /* Turn right */
2736                         tmp = get_next_blocked_member(cur);
2737                         /* Fall back to parent node */
2738                         while (tmp == NULL && cur->fl_blocker != NULL) {
2739                                 cur = cur->fl_blocker;
2740                                 level--;
2741                                 tmp = get_next_blocked_member(cur);
2742                         }
2743                         cur = tmp;
2744                 }
2745         }
2746
2747         return 0;
2748 }
2749
2750 static void __show_fd_locks(struct seq_file *f,
2751                         struct list_head *head, int *id,
2752                         struct file *filp, struct files_struct *files)
2753 {
2754         struct file_lock *fl;
2755
2756         list_for_each_entry(fl, head, fl_list) {
2757
2758                 if (filp != fl->fl_file)
2759                         continue;
2760                 if (fl->fl_owner != files &&
2761                     fl->fl_owner != filp)
2762                         continue;
2763
2764                 (*id)++;
2765                 seq_puts(f, "lock:\t");
2766                 lock_get_status(f, fl, *id, "", 0);
2767         }
2768 }
2769
2770 void show_fd_locks(struct seq_file *f,
2771                   struct file *filp, struct files_struct *files)
2772 {
2773         struct inode *inode = locks_inode(filp);
2774         struct file_lock_context *ctx;
2775         int id = 0;
2776
2777         ctx = smp_load_acquire(&inode->i_flctx);
2778         if (!ctx)
2779                 return;
2780
2781         spin_lock(&ctx->flc_lock);
2782         __show_fd_locks(f, &ctx->flc_flock, &id, filp, files);
2783         __show_fd_locks(f, &ctx->flc_posix, &id, filp, files);
2784         __show_fd_locks(f, &ctx->flc_lease, &id, filp, files);
2785         spin_unlock(&ctx->flc_lock);
2786 }
2787
2788 static void *locks_start(struct seq_file *f, loff_t *pos)
2789         __acquires(&blocked_lock_lock)
2790 {
2791         struct locks_iterator *iter = f->private;
2792
2793         iter->li_pos = *pos + 1;
2794         percpu_down_write(&file_rwsem);
2795         spin_lock(&blocked_lock_lock);
2796         return seq_hlist_start_percpu(&file_lock_list.hlist, &iter->li_cpu, *pos);
2797 }
2798
2799 static void *locks_next(struct seq_file *f, void *v, loff_t *pos)
2800 {
2801         struct locks_iterator *iter = f->private;
2802
2803         ++iter->li_pos;
2804         return seq_hlist_next_percpu(v, &file_lock_list.hlist, &iter->li_cpu, pos);
2805 }
2806
2807 static void locks_stop(struct seq_file *f, void *v)
2808         __releases(&blocked_lock_lock)
2809 {
2810         spin_unlock(&blocked_lock_lock);
2811         percpu_up_write(&file_rwsem);
2812 }
2813
2814 static const struct seq_operations locks_seq_operations = {
2815         .start  = locks_start,
2816         .next   = locks_next,
2817         .stop   = locks_stop,
2818         .show   = locks_show,
2819 };
2820
2821 static int __init proc_locks_init(void)
2822 {
2823         proc_create_seq_private("locks", 0, NULL, &locks_seq_operations,
2824                         sizeof(struct locks_iterator), NULL);
2825         return 0;
2826 }
2827 fs_initcall(proc_locks_init);
2828 #endif
2829
2830 static int __init filelock_init(void)
2831 {
2832         int i;
2833
2834         flctx_cache = kmem_cache_create("file_lock_ctx",
2835                         sizeof(struct file_lock_context), 0, SLAB_PANIC, NULL);
2836
2837         filelock_cache = kmem_cache_create("file_lock_cache",
2838                         sizeof(struct file_lock), 0, SLAB_PANIC, NULL);
2839
2840         for_each_possible_cpu(i) {
2841                 struct file_lock_list_struct *fll = per_cpu_ptr(&file_lock_list, i);
2842
2843                 spin_lock_init(&fll->lock);
2844                 INIT_HLIST_HEAD(&fll->hlist);
2845         }
2846
2847         lease_notifier_chain_init();
2848         return 0;
2849 }
2850 core_initcall(filelock_init);