mptcp: keep track of local endpoint still available for each msk
authorPaolo Abeni <pabeni@redhat.com>
Fri, 7 Jan 2022 00:20:22 +0000 (16:20 -0800)
committerDavid S. Miller <davem@davemloft.net>
Fri, 7 Jan 2022 11:27:07 +0000 (11:27 +0000)
Include into the path manager status a bitmap tracking the list
of local endpoints still available - not yet used - for the
relevant mptcp socket.

Keep such map updated at endpoint creation/deletion time, so
that we can easily skip already used endpoint at local address
selection time.

The endpoint used by the initial subflow is lazyly accounted at
subflow creation time: the usage bitmap is be up2date before
endpoint selection and we avoid such unneeded task in some relevant
scenarios - e.g. busy servers accepting incoming subflows but
not creating any additional ones nor annuncing additional addresses.

Overall this allows for fair local endpoints usage in case of
subflow failure.

As a side effect, this patch also enforces that each endpoint
is used at most once for each mptcp connection.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
net/mptcp/pm.c
net/mptcp/pm_netlink.c
net/mptcp/protocol.c
net/mptcp/protocol.h
tools/testing/selftests/net/mptcp/mptcp_join.sh

index 6b2bfe8..e9ba955 100644 (file)
@@ -370,6 +370,7 @@ void mptcp_pm_data_reset(struct mptcp_sock *msk)
        WRITE_ONCE(msk->pm.accept_subflow, false);
        WRITE_ONCE(msk->pm.remote_deny_join_id0, false);
        msk->pm.status = 0;
+       bitmap_fill(msk->pm.id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
 
        mptcp_pm_nl_data_init(msk);
 }
index 27427ae..ad3dc9c 100644 (file)
@@ -38,10 +38,6 @@ struct mptcp_pm_add_entry {
        u8                      retrans_times;
 };
 
-/* max value of mptcp_addr_info.id */
-#define MAX_ADDR_ID            U8_MAX
-#define BITMAP_SZ DIV_ROUND_UP(MAX_ADDR_ID + 1, BITS_PER_LONG)
-
 struct pm_nl_pernet {
        /* protects pernet updates */
        spinlock_t              lock;
@@ -53,14 +49,14 @@ struct pm_nl_pernet {
        unsigned int            local_addr_max;
        unsigned int            subflows_max;
        unsigned int            next_id;
-       unsigned long           id_bitmap[BITMAP_SZ];
+       DECLARE_BITMAP(id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
 };
 
 #define MPTCP_PM_ADDR_MAX      8
 #define ADD_ADDR_RETRANS_MAX   3
 
 static bool addresses_equal(const struct mptcp_addr_info *a,
-                           struct mptcp_addr_info *b, bool use_port)
+                           const struct mptcp_addr_info *b, bool use_port)
 {
        bool addr_equals = false;
 
@@ -174,6 +170,9 @@ select_local_address(const struct pm_nl_pernet *pernet,
                if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SUBFLOW))
                        continue;
 
+               if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
+                       continue;
+
                if (entry->addr.family != sk->sk_family) {
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
                        if ((entry->addr.family == AF_INET &&
@@ -184,23 +183,17 @@ select_local_address(const struct pm_nl_pernet *pernet,
                                continue;
                }
 
-               /* avoid any address already in use by subflows and
-                * pending join
-                */
-               if (!lookup_subflow_by_saddr(&msk->conn_list, &entry->addr)) {
-                       ret = entry;
-                       break;
-               }
+               ret = entry;
+               break;
        }
        rcu_read_unlock();
        return ret;
 }
 
 static struct mptcp_pm_addr_entry *
-select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
+select_signal_address(struct pm_nl_pernet *pernet, struct mptcp_sock *msk)
 {
        struct mptcp_pm_addr_entry *entry, *ret = NULL;
-       int i = 0;
 
        rcu_read_lock();
        /* do not keep any additional per socket state, just signal
@@ -209,12 +202,14 @@ select_signal_address(struct pm_nl_pernet *pernet, unsigned int pos)
         * can lead to additional addresses not being announced.
         */
        list_for_each_entry_rcu(entry, &pernet->local_addr_list, list) {
+               if (!test_bit(entry->addr.id, msk->pm.id_avail_bitmap))
+                       continue;
+
                if (!(entry->flags & MPTCP_PM_ADDR_FLAG_SIGNAL))
                        continue;
-               if (i++ == pos) {
-                       ret = entry;
-                       break;
-               }
+
+               ret = entry;
+               break;
        }
        rcu_read_unlock();
        return ret;
@@ -258,9 +253,11 @@ EXPORT_SYMBOL_GPL(mptcp_pm_get_local_addr_max);
 
 static void check_work_pending(struct mptcp_sock *msk)
 {
-       if (msk->pm.add_addr_signaled == mptcp_pm_get_add_addr_signal_max(msk) &&
-           (msk->pm.local_addr_used == mptcp_pm_get_local_addr_max(msk) ||
-            msk->pm.subflows == mptcp_pm_get_subflows_max(msk)))
+       struct pm_nl_pernet *pernet = net_generic(sock_net((struct sock *)msk), pm_nl_pernet_id);
+
+       if (msk->pm.subflows == mptcp_pm_get_subflows_max(msk) ||
+           (find_next_and_bit(pernet->id_bitmap, msk->pm.id_avail_bitmap,
+                              MPTCP_PM_MAX_ADDR_ID + 1, 0) == MPTCP_PM_MAX_ADDR_ID + 1))
                WRITE_ONCE(msk->pm.work_pending, false);
 }
 
@@ -460,6 +457,35 @@ static unsigned int fill_remote_addresses_vec(struct mptcp_sock *msk, bool fullm
        return i;
 }
 
+static struct mptcp_pm_addr_entry *
+__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
+{
+       struct mptcp_pm_addr_entry *entry;
+
+       list_for_each_entry(entry, &pernet->local_addr_list, list) {
+               if (entry->addr.id == id)
+                       return entry;
+       }
+       return NULL;
+}
+
+static int
+lookup_id_by_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *addr)
+{
+       struct mptcp_pm_addr_entry *entry;
+       int ret = -1;
+
+       rcu_read_lock();
+       list_for_each_entry(entry, &pernet->local_addr_list, list) {
+               if (addresses_equal(&entry->addr, addr, entry->addr.port)) {
+                       ret = entry->addr.id;
+                       break;
+               }
+       }
+       rcu_read_unlock();
+       return ret;
+}
+
 static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 {
        struct sock *sk = (struct sock *)msk;
@@ -475,6 +501,19 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
        local_addr_max = mptcp_pm_get_local_addr_max(msk);
        subflows_max = mptcp_pm_get_subflows_max(msk);
 
+       /* do lazy endpoint usage accounting for the MPC subflows */
+       if (unlikely(!(msk->pm.status & BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED))) && msk->first) {
+               struct mptcp_addr_info local;
+               int mpc_id;
+
+               local_address((struct sock_common *)msk->first, &local);
+               mpc_id = lookup_id_by_addr(pernet, &local);
+               if (mpc_id < 0)
+                       __clear_bit(mpc_id, msk->pm.id_avail_bitmap);
+
+               msk->pm.status |= BIT(MPTCP_PM_MPC_ENDPOINT_ACCOUNTED);
+       }
+
        pr_debug("local %d:%d signal %d:%d subflows %d:%d\n",
                 msk->pm.local_addr_used, local_addr_max,
                 msk->pm.add_addr_signaled, add_addr_signal_max,
@@ -482,21 +521,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
 
        /* check first for announce */
        if (msk->pm.add_addr_signaled < add_addr_signal_max) {
-               local = select_signal_address(pernet,
-                                             msk->pm.add_addr_signaled);
+               local = select_signal_address(pernet, msk);
 
                if (local) {
                        if (mptcp_pm_alloc_anno_list(msk, local)) {
+                               __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
                                msk->pm.add_addr_signaled++;
                                mptcp_pm_announce_addr(msk, &local->addr, false);
                                mptcp_pm_nl_addr_send_ack(msk);
                        }
-               } else {
-                       /* pick failed, avoid fourther attempts later */
-                       msk->pm.local_addr_used = add_addr_signal_max;
                }
-
-               check_work_pending(msk);
        }
 
        /* check if should create a new subflow */
@@ -510,19 +544,16 @@ static void mptcp_pm_create_subflow_or_signal_addr(struct mptcp_sock *msk)
                        int i, nr;
 
                        msk->pm.local_addr_used++;
-                       check_work_pending(msk);
                        nr = fill_remote_addresses_vec(msk, fullmesh, addrs);
+                       if (nr)
+                               __clear_bit(local->addr.id, msk->pm.id_avail_bitmap);
                        spin_unlock_bh(&msk->pm.lock);
                        for (i = 0; i < nr; i++)
                                __mptcp_subflow_connect(sk, &local->addr, &addrs[i]);
                        spin_lock_bh(&msk->pm.lock);
-                       return;
                }
-
-               /* lookup failed, avoid fourther attempts later */
-               msk->pm.local_addr_used = local_addr_max;
-               check_work_pending(msk);
        }
+       check_work_pending(msk);
 }
 
 static void mptcp_pm_nl_fully_established(struct mptcp_sock *msk)
@@ -736,6 +767,7 @@ static void mptcp_pm_nl_rm_addr_or_subflow(struct mptcp_sock *msk,
                        msk->pm.subflows--;
                        __MPTCP_INC_STATS(sock_net(sk), rm_type);
                }
+               __set_bit(rm_list->ids[1], msk->pm.id_avail_bitmap);
                if (!removed)
                        continue;
 
@@ -765,6 +797,9 @@ void mptcp_pm_nl_work(struct mptcp_sock *msk)
 
        msk_owned_by_me(msk);
 
+       if (!(pm->status & MPTCP_PM_WORK_MASK))
+               return;
+
        spin_lock_bh(&msk->pm.lock);
 
        pr_debug("msk=%p status=%x", msk, pm->status);
@@ -810,7 +845,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
        /* to keep the code simple, don't do IDR-like allocation for address ID,
         * just bail when we exceed limits
         */
-       if (pernet->next_id == MAX_ADDR_ID)
+       if (pernet->next_id == MPTCP_PM_MAX_ADDR_ID)
                pernet->next_id = 1;
        if (pernet->addrs >= MPTCP_PM_ADDR_MAX)
                goto out;
@@ -830,7 +865,7 @@ static int mptcp_pm_nl_append_new_local_addr(struct pm_nl_pernet *pernet,
        if (!entry->addr.id) {
 find_next:
                entry->addr.id = find_next_zero_bit(pernet->id_bitmap,
-                                                   MAX_ADDR_ID + 1,
+                                                   MPTCP_PM_MAX_ADDR_ID + 1,
                                                    pernet->next_id);
                if (!entry->addr.id && pernet->next_id != 1) {
                        pernet->next_id = 1;
@@ -1197,18 +1232,6 @@ static int mptcp_nl_cmd_add_addr(struct sk_buff *skb, struct genl_info *info)
        return 0;
 }
 
-static struct mptcp_pm_addr_entry *
-__lookup_addr_by_id(struct pm_nl_pernet *pernet, unsigned int id)
-{
-       struct mptcp_pm_addr_entry *entry;
-
-       list_for_each_entry(entry, &pernet->local_addr_list, list) {
-               if (entry->addr.id == id)
-                       return entry;
-       }
-       return NULL;
-}
-
 int mptcp_pm_get_flags_and_ifindex_by_id(struct net *net, unsigned int id,
                                         u8 *flags, int *ifindex)
 {
@@ -1467,7 +1490,7 @@ static int mptcp_nl_cmd_flush_addrs(struct sk_buff *skb, struct genl_info *info)
        list_splice_init(&pernet->local_addr_list, &free_list);
        __reset_counters(pernet);
        pernet->next_id = 1;
-       bitmap_zero(pernet->id_bitmap, MAX_ADDR_ID + 1);
+       bitmap_zero(pernet->id_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
        spin_unlock_bh(&pernet->lock);
        mptcp_nl_remove_addrs_list(sock_net(skb->sk), &free_list);
        synchronize_rcu();
@@ -1577,7 +1600,7 @@ static int mptcp_nl_cmd_dump_addrs(struct sk_buff *msg,
        pernet = net_generic(net, pm_nl_pernet_id);
 
        spin_lock_bh(&pernet->lock);
-       for (i = id; i < MAX_ADDR_ID + 1; i++) {
+       for (i = id; i < MPTCP_PM_MAX_ADDR_ID + 1; i++) {
                if (test_bit(i, pernet->id_bitmap)) {
                        entry = __lookup_addr_by_id(pernet, i);
                        if (!entry)
index 667e153..5c956a8 100644 (file)
@@ -2488,8 +2488,7 @@ static void mptcp_worker(struct work_struct *work)
 
        mptcp_check_fastclose(msk);
 
-       if (msk->pm.status)
-               mptcp_pm_nl_work(msk);
+       mptcp_pm_nl_work(msk);
 
        if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
                mptcp_check_for_eof(msk);
index 386d1a9..2a6f096 100644 (file)
@@ -173,16 +173,25 @@ enum mptcp_pm_status {
        MPTCP_PM_ADD_ADDR_SEND_ACK,
        MPTCP_PM_RM_ADDR_RECEIVED,
        MPTCP_PM_ESTABLISHED,
-       MPTCP_PM_ALREADY_ESTABLISHED,   /* persistent status, set after ESTABLISHED event */
        MPTCP_PM_SUBFLOW_ESTABLISHED,
+       MPTCP_PM_ALREADY_ESTABLISHED,   /* persistent status, set after ESTABLISHED event */
+       MPTCP_PM_MPC_ENDPOINT_ACCOUNTED /* persistent status, set after MPC local address is
+                                        * accounted int id_avail_bitmap
+                                        */
 };
 
+/* Status bits below MPTCP_PM_ALREADY_ESTABLISHED need pm worker actions */
+#define MPTCP_PM_WORK_MASK ((1 << MPTCP_PM_ALREADY_ESTABLISHED) - 1)
+
 enum mptcp_addr_signal_status {
        MPTCP_ADD_ADDR_SIGNAL,
        MPTCP_ADD_ADDR_ECHO,
        MPTCP_RM_ADDR_SIGNAL,
 };
 
+/* max value of mptcp_addr_info.id */
+#define MPTCP_PM_MAX_ADDR_ID           U8_MAX
+
 struct mptcp_pm_data {
        struct mptcp_addr_info local;
        struct mptcp_addr_info remote;
@@ -201,6 +210,7 @@ struct mptcp_pm_data {
        u8              local_addr_used;
        u8              subflows;
        u8              status;
+       DECLARE_BITMAP(id_avail_bitmap, MPTCP_PM_MAX_ADDR_ID + 1);
        struct mptcp_rm_list rm_list_tx;
        struct mptcp_rm_list rm_list_rx;
 };
index 7ef639a..bbafa4c 100755 (executable)
@@ -1071,7 +1071,10 @@ signal_address_tests()
        ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags signal
        ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags signal
        run_tests $ns1 $ns2 10.0.1.1
-       chk_add_nr 4 4
+
+       # the server will not signal the address terminating
+       # the MPC subflow
+       chk_add_nr 3 3
 }
 
 link_failure_tests()