ipsec: Fix xfrm_state_walk race
authorHerbert Xu <herbert@gondor.apana.org.au>
Tue, 23 Sep 2008 02:48:19 +0000 (19:48 -0700)
committerDavid S. Miller <davem@davemloft.net>
Tue, 23 Sep 2008 02:48:19 +0000 (19:48 -0700)
As discovered by Timo Teräs, the currently xfrm_state_walk scheme
is racy because if a second dump finishes before the first, we
may free xfrm states that the first dump would walk over later.

This patch fixes this by storing the dumps in a list in order
to calculate the correct completion counter which cures this
problem.

I've expanded netlink_cb in order to accomodate the extra state
related to this.  It shouldn't be a big deal since netlink_cb
is kmalloced for each dump and we're just increasing it by 4 or
8 bytes.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
include/linux/netlink.h
include/net/xfrm.h
net/xfrm/xfrm_state.c

index 9ff1b54..cbba776 100644 (file)
@@ -220,7 +220,7 @@ struct netlink_callback
        int             (*dump)(struct sk_buff * skb, struct netlink_callback *cb);
        int             (*done)(struct netlink_callback *cb);
        int             family;
-       long            args[6];
+       long            args[7];
 };
 
 struct netlink_notify
index 4bb9499..48630b2 100644 (file)
@@ -1246,6 +1246,8 @@ struct xfrm6_tunnel {
 };
 
 struct xfrm_state_walk {
+       struct list_head list;
+       unsigned long genid;
        struct xfrm_state *state;
        int count;
        u8 proto;
@@ -1281,13 +1283,7 @@ static inline void xfrm6_fini(void)
 extern int xfrm_proc_init(void);
 #endif
 
-static inline void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
-{
-       walk->proto = proto;
-       walk->state = NULL;
-       walk->count = 0;
-}
-
+extern void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto);
 extern int xfrm_state_walk(struct xfrm_state_walk *walk,
                           int (*func)(struct xfrm_state *, int, void*), void *);
 extern void xfrm_state_walk_done(struct xfrm_state_walk *walk);
index abbe270..053970e 100644 (file)
@@ -64,6 +64,9 @@ static unsigned long xfrm_state_walk_ongoing;
 /* Counter indicating walk completion, protected by xfrm_cfg_mutex. */
 static unsigned long xfrm_state_walk_completed;
 
+/* List of outstanding state walks used to set the completed counter.  */
+static LIST_HEAD(xfrm_state_walks);
+
 static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned int family);
 static void xfrm_state_put_afinfo(struct xfrm_state_afinfo *afinfo);
 
@@ -1584,7 +1587,6 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
                        if (err) {
                                xfrm_state_hold(last);
                                walk->state = last;
-                               xfrm_state_walk_ongoing++;
                                goto out;
                        }
                }
@@ -1599,25 +1601,44 @@ int xfrm_state_walk(struct xfrm_state_walk *walk,
                err = func(last, 0, data);
 out:
        spin_unlock_bh(&xfrm_state_lock);
-       if (old != NULL) {
+       if (old != NULL)
                xfrm_state_put(old);
-               xfrm_state_walk_completed++;
-               if (!list_empty(&xfrm_state_gc_leftovers))
-                       schedule_work(&xfrm_state_gc_work);
-       }
        return err;
 }
 EXPORT_SYMBOL(xfrm_state_walk);
 
+void xfrm_state_walk_init(struct xfrm_state_walk *walk, u8 proto)
+{
+       walk->proto = proto;
+       walk->state = NULL;
+       walk->count = 0;
+       list_add_tail(&walk->list, &xfrm_state_walks);
+       walk->genid = ++xfrm_state_walk_ongoing;
+}
+EXPORT_SYMBOL(xfrm_state_walk_init);
+
 void xfrm_state_walk_done(struct xfrm_state_walk *walk)
 {
+       struct list_head *prev;
+
        if (walk->state != NULL) {
                xfrm_state_put(walk->state);
                walk->state = NULL;
-               xfrm_state_walk_completed++;
-               if (!list_empty(&xfrm_state_gc_leftovers))
-                       schedule_work(&xfrm_state_gc_work);
        }
+
+       prev = walk->list.prev;
+       list_del(&walk->list);
+
+       if (prev != &xfrm_state_walks) {
+               list_entry(prev, struct xfrm_state_walk, list)->genid =
+                       walk->genid;
+               return;
+       }
+
+       xfrm_state_walk_completed = walk->genid;
+
+       if (!list_empty(&xfrm_state_gc_leftovers))
+               schedule_work(&xfrm_state_gc_work);
 }
 EXPORT_SYMBOL(xfrm_state_walk_done);