futex: Prevent attaching to kernel threads
[platform/adaptation/renesas_rcar/renesas_kernel.git] / kernel / futex.c
index 44a1261..8700881 100644 (file)
  * enqueue.
  */
 
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
 int __read_mostly futex_cmpxchg_enabled;
+#endif
 
 /*
  * Futex flags used to encode options to functions and preserve them across
@@ -234,6 +236,7 @@ static const struct futex_q futex_q_init = {
  * waiting on a futex.
  */
 struct futex_hash_bucket {
+       atomic_t waiters;
        spinlock_t lock;
        struct plist_head chain;
 } ____cacheline_aligned_in_smp;
@@ -253,22 +256,37 @@ static inline void futex_get_mm(union futex_key *key)
        smp_mb__after_atomic_inc();
 }
 
-static inline bool hb_waiters_pending(struct futex_hash_bucket *hb)
+/*
+ * Reflects a new waiter being added to the waitqueue.
+ */
+static inline void hb_waiters_inc(struct futex_hash_bucket *hb)
 {
 #ifdef CONFIG_SMP
+       atomic_inc(&hb->waiters);
        /*
-        * Tasks trying to enter the critical region are most likely
-        * potential waiters that will be added to the plist. Ensure
-        * that wakers won't miss to-be-slept tasks in the window between
-        * the wait call and the actual plist_add.
+        * Full barrier (A), see the ordering comment above.
         */
-       if (spin_is_locked(&hb->lock))
-               return true;
-       smp_rmb(); /* Make sure we check the lock state first */
+       smp_mb__after_atomic_inc();
+#endif
+}
 
-       return !plist_head_empty(&hb->chain);
+/*
+ * Reflects a waiter being removed from the waitqueue by wakeup
+ * paths.
+ */
+static inline void hb_waiters_dec(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+       atomic_dec(&hb->waiters);
+#endif
+}
+
+static inline int hb_waiters_pending(struct futex_hash_bucket *hb)
+{
+#ifdef CONFIG_SMP
+       return atomic_read(&hb->waiters);
 #else
-       return true;
+       return 1;
 #endif
 }
 
@@ -713,7 +731,8 @@ void exit_pi_state_list(struct task_struct *curr)
 
 static int
 lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
-               union futex_key *key, struct futex_pi_state **ps)
+               union futex_key *key, struct futex_pi_state **ps,
+               struct task_struct *task)
 {
        struct futex_pi_state *pi_state = NULL;
        struct futex_q *this, *next;
@@ -754,6 +773,16 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
                                        return -EINVAL;
                        }
 
+                       /*
+                        * Protect against a corrupted uval. If uval
+                        * is 0x80000000 then pid is 0 and the waiter
+                        * bit is set. So the deadlock check in the
+                        * calling code has failed and we did not fall
+                        * into the check above due to !pid.
+                        */
+                       if (task && pi_state->owner == task)
+                               return -EDEADLK;
+
                        atomic_inc(&pi_state->refcount);
                        *ps = pi_state;
 
@@ -771,6 +800,11 @@ lookup_pi_state(u32 uval, struct futex_hash_bucket *hb,
        if (!p)
                return -ESRCH;
 
+       if (!p->mm) {
+               put_task_struct(p);
+               return -EPERM;
+       }
+
        /*
         * We need to look at the task state flags to figure out,
         * whether the task is exiting. To protect against the do_exit
@@ -903,7 +937,7 @@ retry:
         * We dont have the lock. Look up the PI state (or create it if
         * we are the first waiter):
         */
-       ret = lookup_pi_state(uval, hb, key, ps);
+       ret = lookup_pi_state(uval, hb, key, ps, task);
 
        if (unlikely(ret)) {
                switch (ret) {
@@ -954,6 +988,7 @@ static void __unqueue_futex(struct futex_q *q)
 
        hb = container_of(q->lock_ptr, struct futex_hash_bucket, lock);
        plist_del(&q->list, &hb->chain);
+       hb_waiters_dec(hb);
 }
 
 /*
@@ -1257,7 +1292,9 @@ void requeue_futex(struct futex_q *q, struct futex_hash_bucket *hb1,
         */
        if (likely(&hb1->chain != &hb2->chain)) {
                plist_del(&q->list, &hb1->chain);
+               hb_waiters_dec(hb1);
                plist_add(&q->list, &hb2->chain);
+               hb_waiters_inc(hb2);
                q->lock_ptr = &hb2->lock;
        }
        get_futex_key_refs(key2);
@@ -1312,7 +1349,7 @@ void requeue_pi_wake_futex(struct futex_q *q, union futex_key *key,
  *
  * Return:
  *  0 - failed to acquire the lock atomically;
- *  1 - acquired the lock;
+ * >0 - acquired the lock, return value is vpid of the top_waiter
  * <0 - error
  */
 static int futex_proxy_trylock_atomic(u32 __user *pifutex,
@@ -1323,7 +1360,7 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
 {
        struct futex_q *top_waiter = NULL;
        u32 curval;
-       int ret;
+       int ret, vpid;
 
        if (get_futex_value_locked(&curval, pifutex))
                return -EFAULT;
@@ -1351,11 +1388,13 @@ static int futex_proxy_trylock_atomic(u32 __user *pifutex,
         * the contended case or if set_waiters is 1.  The pi_state is returned
         * in ps in contended cases.
         */
+       vpid = task_pid_vnr(top_waiter->task);
        ret = futex_lock_pi_atomic(pifutex, hb2, key2, ps, top_waiter->task,
                                   set_waiters);
-       if (ret == 1)
+       if (ret == 1) {
                requeue_pi_wake_futex(top_waiter, key2, hb2);
-
+               return vpid;
+       }
        return ret;
 }
 
@@ -1386,7 +1425,6 @@ static int futex_requeue(u32 __user *uaddr1, unsigned int flags,
        struct futex_pi_state *pi_state = NULL;
        struct futex_hash_bucket *hb1, *hb2;
        struct futex_q *this, *next;
-       u32 curval2;
 
        if (requeue_pi) {
                /*
@@ -1431,6 +1469,7 @@ retry:
        hb2 = hash_futex(&key2);
 
 retry_private:
+       hb_waiters_inc(hb2);
        double_lock_hb(hb1, hb2);
 
        if (likely(cmpval != NULL)) {
@@ -1440,6 +1479,7 @@ retry_private:
 
                if (unlikely(ret)) {
                        double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
 
                        ret = get_user(curval, uaddr1);
                        if (ret)
@@ -1472,16 +1512,25 @@ retry_private:
                 * At this point the top_waiter has either taken uaddr2 or is
                 * waiting on it.  If the former, then the pi_state will not
                 * exist yet, look it up one more time to ensure we have a
-                * reference to it.
+                * reference to it. If the lock was taken, ret contains the
+                * vpid of the top waiter task.
                 */
-               if (ret == 1) {
+               if (ret > 0) {
                        WARN_ON(pi_state);
                        drop_count++;
                        task_count++;
-                       ret = get_futex_value_locked(&curval2, uaddr2);
-                       if (!ret)
-                               ret = lookup_pi_state(curval2, hb2, &key2,
-                                                     &pi_state);
+                       /*
+                        * If we acquired the lock, then the user
+                        * space value of uaddr2 should be vpid. It
+                        * cannot be changed by the top waiter as it
+                        * is blocked on hb2 lock if it tries to do
+                        * so. If something fiddled with it behind our
+                        * back the pi state lookup might unearth
+                        * it. So we rather use the known value than
+                        * rereading and handing potential crap to
+                        * lookup_pi_state.
+                        */
+                       ret = lookup_pi_state(ret, hb2, &key2, &pi_state, NULL);
                }
 
                switch (ret) {
@@ -1489,6 +1538,7 @@ retry_private:
                        break;
                case -EFAULT:
                        double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
                        put_futex_key(&key2);
                        put_futex_key(&key1);
                        ret = fault_in_user_writeable(uaddr2);
@@ -1498,6 +1548,7 @@ retry_private:
                case -EAGAIN:
                        /* The owner was exiting, try again. */
                        double_unlock_hb(hb1, hb2);
+                       hb_waiters_dec(hb2);
                        put_futex_key(&key2);
                        put_futex_key(&key1);
                        cond_resched();
@@ -1573,6 +1624,7 @@ retry_private:
 
 out_unlock:
        double_unlock_hb(hb1, hb2);
+       hb_waiters_dec(hb2);
 
        /*
         * drop_futex_key_refs() must be called outside the spinlocks. During
@@ -1600,6 +1652,17 @@ static inline struct futex_hash_bucket *queue_lock(struct futex_q *q)
        struct futex_hash_bucket *hb;
 
        hb = hash_futex(&q->key);
+
+       /*
+        * Increment the counter before taking the lock so that
+        * a potential waker won't miss a to-be-slept task that is
+        * waiting for the spinlock. This is safe as all queue_lock()
+        * users end up calling queue_me(). Similarly, for housekeeping,
+        * decrement the counter at queue_unlock() when some error has
+        * occurred and we don't end up adding the task to the list.
+        */
+       hb_waiters_inc(hb);
+
        q->lock_ptr = &hb->lock;
 
        spin_lock(&hb->lock); /* implies MB (A) */
@@ -1611,6 +1674,7 @@ queue_unlock(struct futex_hash_bucket *hb)
        __releases(&hb->lock)
 {
        spin_unlock(&hb->lock);
+       hb_waiters_dec(hb);
 }
 
 /**
@@ -2342,6 +2406,7 @@ int handle_early_requeue_pi_wakeup(struct futex_hash_bucket *hb,
                 * Unqueue the futex_q and determine which it was.
                 */
                plist_del(&q->list, &hb->chain);
+               hb_waiters_dec(hb);
 
                /* Handle spurious wakeups gracefully */
                ret = -EWOULDBLOCK;
@@ -2843,9 +2908,28 @@ SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
        return do_futex(uaddr, op, val, tp, uaddr2, val2, val3);
 }
 
-static int __init futex_init(void)
+static void __init futex_detect_cmpxchg(void)
 {
+#ifndef CONFIG_HAVE_FUTEX_CMPXCHG
        u32 curval;
+
+       /*
+        * This will fail and we want it. Some arch implementations do
+        * runtime detection of the futex_atomic_cmpxchg_inatomic()
+        * functionality. We want to know that before we call in any
+        * of the complex code paths. Also we want to prevent
+        * registration of robust lists in that case. NULL is
+        * guaranteed to fault and we get -EFAULT on functional
+        * implementation, the non-functional ones will return
+        * -ENOSYS.
+        */
+       if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
+               futex_cmpxchg_enabled = 1;
+#endif
+}
+
+static int __init futex_init(void)
+{
        unsigned int futex_shift;
        unsigned long i;
 
@@ -2861,20 +2945,11 @@ static int __init futex_init(void)
                                               &futex_shift, NULL,
                                               futex_hashsize, futex_hashsize);
        futex_hashsize = 1UL << futex_shift;
-       /*
-        * This will fail and we want it. Some arch implementations do
-        * runtime detection of the futex_atomic_cmpxchg_inatomic()
-        * functionality. We want to know that before we call in any
-        * of the complex code paths. Also we want to prevent
-        * registration of robust lists in that case. NULL is
-        * guaranteed to fault and we get -EFAULT on functional
-        * implementation, the non-functional ones will return
-        * -ENOSYS.
-        */
-       if (cmpxchg_futex_value_locked(&curval, NULL, 0, 0) == -EFAULT)
-               futex_cmpxchg_enabled = 1;
+
+       futex_detect_cmpxchg();
 
        for (i = 0; i < futex_hashsize; i++) {
+               atomic_set(&futex_queues[i].waiters, 0);
                plist_head_init(&futex_queues[i].chain);
                spin_lock_init(&futex_queues[i].lock);
        }