Revert "net: sctp: Fix a_rwnd/rwnd management to reflect real state of the receiver...

author Daniel Borkmann <dborkman@redhat.com>

Mon, 14 Apr 2014 19:45:17 +0000 (21:45 +0200)

committer David S. Miller <davem@davemloft.net>

Mon, 14 Apr 2014 20:26:48 +0000 (16:26 -0400)
author Daniel Borkmann <dborkman@redhat.com>
Mon, 14 Apr 2014 19:45:17 +0000 (21:45 +0200)
committer David S. Miller <davem@davemloft.net>
Mon, 14 Apr 2014 20:26:48 +0000 (16:26 -0400)
diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h

index 6ee76c804893fc9f291a469550b2356fe1b93c39..d992ca3145fec9826c1df1cf0affc95272a6af25 100644 (file)
--- a/include/net/sctp/structs.h
+++ b/include/net/sctp/structs.h
@@ -1653,6 +1653,17 @@ struct sctp_association {
         /* This is the last advertised value of rwnd over a SACK chunk. */
         __u32 a_rwnd;
  
+       /* Number of bytes by which the rwnd has slopped.  The rwnd is allowed
+        * to slop over a maximum of the association's frag_point.
+        */
+       __u32 rwnd_over;
+
+       /* Keeps treack of rwnd pressure.  This happens when we have
+        * a window, but not recevie buffer (i.e small packets).  This one
+        * is releases slowly (1 PMTU at a time ).
+        */
+       __u32 rwnd_press;
+
         /* This is the sndbuf size in use for the association.
          * This corresponds to the sndbuf size for the association,
          * as specified in the sk->sndbuf.
@@ -1881,7 +1892,8 @@ void sctp_assoc_update(struct sctp_association *old,
  __u32 sctp_association_get_next_tsn(struct sctp_association *);
  
  void sctp_assoc_sync_pmtu(struct sock *, struct sctp_association *);
-void sctp_assoc_rwnd_update(struct sctp_association *, bool);
+void sctp_assoc_rwnd_increase(struct sctp_association *, unsigned int);
+void sctp_assoc_rwnd_decrease(struct sctp_association *, unsigned int);
  void sctp_assoc_set_primary(struct sctp_association *,
                             struct sctp_transport *);
  void sctp_assoc_del_nonprimary_peers(struct sctp_association *,
diff --git a/net/sctp/associola.c b/net/sctp/associola.c

index 4f6d6f9d127474b457cf274a1a0977bb75c6e8dc..39579c3e0d14c12f165e420be064d7fbf248c0ad 100644 (file)
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -1395,35 +1395,44 @@ static inline bool sctp_peer_needs_update(struct sctp_association *asoc)
         return false;
  }
  
-/* Update asoc's rwnd for the approximated state in the buffer,
- * and check whether SACK needs to be sent.
- */
-void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
+/* Increase asoc's rwnd by len and send any window update SACK if needed. */
+void sctp_assoc_rwnd_increase(struct sctp_association *asoc, unsigned int len)
  {
-       int rx_count;
         struct sctp_chunk *sack;
         struct timer_list *timer;
  
-       if (asoc->ep->rcvbuf_policy)
-               rx_count = atomic_read(&asoc->rmem_alloc);
-       else
-               rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+       if (asoc->rwnd_over) {
+               if (asoc->rwnd_over >= len) {
+                       asoc->rwnd_over -= len;
+               } else {
+                       asoc->rwnd += (len - asoc->rwnd_over);
+                       asoc->rwnd_over = 0;
+               }
+       } else {
+               asoc->rwnd += len;
+       }
  
-       if ((asoc->base.sk->sk_rcvbuf - rx_count) > 0)
-               asoc->rwnd = (asoc->base.sk->sk_rcvbuf - rx_count) >> 1;
-       else
-               asoc->rwnd = 0;
+       /* If we had window pressure, start recovering it
+        * once our rwnd had reached the accumulated pressure
+        * threshold.  The idea is to recover slowly, but up
+        * to the initial advertised window.
+        */
+       if (asoc->rwnd_press && asoc->rwnd >= asoc->rwnd_press) {
+               int change = min(asoc->pathmtu, asoc->rwnd_press);
+               asoc->rwnd += change;
+               asoc->rwnd_press -= change;
+       }
  
-       pr_debug("%s: asoc:%p rwnd=%u, rx_count=%d, sk_rcvbuf=%d\n",
-                __func__, asoc, asoc->rwnd, rx_count,
-                asoc->base.sk->sk_rcvbuf);
+       pr_debug("%s: asoc:%p rwnd increased by %d to (%u, %u) - %u\n",
+                __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+                asoc->a_rwnd);
  
         /* Send a window update SACK if the rwnd has increased by at least the
          * minimum of the association's PMTU and half of the receive buffer.
          * The algorithm used is similar to the one described in
          * Section 4.2.3.3 of RFC 1122.
          */
-       if (update_peer && sctp_peer_needs_update(asoc)) {
+       if (sctp_peer_needs_update(asoc)) {
                 asoc->a_rwnd = asoc->rwnd;
  
                 pr_debug("%s: sending window update SACK- asoc:%p rwnd:%u "
@@ -1445,6 +1454,45 @@ void sctp_assoc_rwnd_update(struct sctp_association *asoc, bool update_peer)
         }
  }
  
+/* Decrease asoc's rwnd by len. */
+void sctp_assoc_rwnd_decrease(struct sctp_association *asoc, unsigned int len)
+{
+       int rx_count;
+       int over = 0;
+
+       if (unlikely(!asoc->rwnd || asoc->rwnd_over))
+               pr_debug("%s: association:%p has asoc->rwnd:%u, "
+                        "asoc->rwnd_over:%u!\n", __func__, asoc,
+                        asoc->rwnd, asoc->rwnd_over);
+
+       if (asoc->ep->rcvbuf_policy)
+               rx_count = atomic_read(&asoc->rmem_alloc);
+       else
+               rx_count = atomic_read(&asoc->base.sk->sk_rmem_alloc);
+
+       /* If we've reached or overflowed our receive buffer, announce
+        * a 0 rwnd if rwnd would still be positive.  Store the
+        * the potential pressure overflow so that the window can be restored
+        * back to original value.
+        */
+       if (rx_count >= asoc->base.sk->sk_rcvbuf)
+               over = 1;
+
+       if (asoc->rwnd >= len) {
+               asoc->rwnd -= len;
+               if (over) {
+                       asoc->rwnd_press += asoc->rwnd;
+                       asoc->rwnd = 0;
+               }
+       } else {
+               asoc->rwnd_over = len - asoc->rwnd;
+               asoc->rwnd = 0;
+       }
+
+       pr_debug("%s: asoc:%p rwnd decreased by %d to (%u, %u, %u)\n",
+                __func__, asoc, len, asoc->rwnd, asoc->rwnd_over,
+                asoc->rwnd_press);
+}
  
  /* Build the bind address list for the association based on info from the
   * local endpoint and the remote peer.
diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c

index 01e002430c858c293cdda11bd2962c3340043fb9..ae9fbeba40b03ca22bbfdd4f34106905e0c01d1e 100644 (file)
--- a/net/sctp/sm_statefuns.c
+++ b/net/sctp/sm_statefuns.c
@@ -6178,7 +6178,7 @@ static int sctp_eat_data(const struct sctp_association *asoc,
          * PMTU.  In cases, such as loopback, this might be a rather
          * large spill over.
          */
-       if ((!chunk->data_accepted) && (!asoc->rwnd ||
+       if ((!chunk->data_accepted) && (!asoc->rwnd || asoc->rwnd_over ||
             (datalen > asoc->rwnd + asoc->frag_point))) {
  
                 /* If this is the next TSN, consider reneging to make
diff --git a/net/sctp/socket.c b/net/sctp/socket.c

index e13519e9df80679a618f4e058dce88443b8add6c..ff20e2dbbbc7ef74d174aea2db05b83c3c91eab3 100644 (file)
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2115,6 +2115,12 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
                 sctp_skb_pull(skb, copied);
                 skb_queue_head(&sk->sk_receive_queue, skb);
  
+               /* When only partial message is copied to the user, increase
+                * rwnd by that amount. If all the data in the skb is read,
+                * rwnd is updated when the event is freed.
+                */
+               if (!sctp_ulpevent_is_notification(event))
+                       sctp_assoc_rwnd_increase(event->asoc, copied);
                 goto out;
         } else if ((event->msg_flags & MSG_NOTIFICATION) ||
                    (event->msg_flags & MSG_EOR))
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c

index 8d198ae0360634d25d3e4cff8a56cf73e389bd2d..85c64658bd0b183df5c7a7fd8394df757cb0b4b0 100644 (file)
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -989,7 +989,7 @@ static void sctp_ulpevent_receive_data(struct sctp_ulpevent *event,
         skb = sctp_event2skb(event);
         /* Set the owner and charge rwnd for bytes received.  */
         sctp_ulpevent_set_owner(event, asoc);
-       sctp_assoc_rwnd_update(asoc, false);
+       sctp_assoc_rwnd_decrease(asoc, skb_headlen(skb));
  
         if (!skb->data_len)
                 return;
@@ -1011,7 +1011,6 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
  {
         struct sk_buff *skb, *frag;
         unsigned int    len;
-       struct sctp_association *asoc;
  
         /* Current stack structures assume that the rcv buffer is
          * per socket.   For UDP style sockets this is not true as
@@ -1036,11 +1035,8 @@ static void sctp_ulpevent_release_data(struct sctp_ulpevent *event)
         }
  
  done:
-       asoc = event->asoc;
-       sctp_association_hold(asoc);
+       sctp_assoc_rwnd_increase(event->asoc, len);
         sctp_ulpevent_release_owner(event);
-       sctp_assoc_rwnd_update(asoc, true);
-       sctp_association_put(asoc);
  }
  
  static void sctp_ulpevent_release_frag_data(struct sctp_ulpevent *event)
author	Daniel Borkmann <dborkman@redhat.com>
	Mon, 14 Apr 2014 19:45:17 +0000 (21:45 +0200)
committer	David S. Miller <davem@davemloft.net>
	Mon, 14 Apr 2014 20:26:48 +0000 (16:26 -0400)
include/net/sctp/structs.h		patch \| blob \| history
net/sctp/associola.c		patch \| blob \| history
net/sctp/sm_statefuns.c		patch \| blob \| history
net/sctp/socket.c		patch \| blob \| history
net/sctp/ulpevent.c		patch \| blob \| history