2 * This file is part of the Nice GLib ICE library.
4 * (C) 2010, 2014 Collabora Ltd.
5 * Contact: Philip Withnall
8 * The contents of this file are subject to the Mozilla Public License Version
9 * 1.1 (the "License"); you may not use this file except in compliance with
10 * the License. You may obtain a copy of the License at
11 * http://www.mozilla.org/MPL/
13 * Software distributed under the License is distributed on an "AS IS" basis,
14 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
15 * for the specific language governing rights and limitations under the
18 * The Original Code is the Nice GLib ICE library.
20 * The Initial Developers of the Original Code are Collabora Ltd and Nokia
21 * Corporation. All Rights Reserved.
24 * Youness Alaoui, Collabora Ltd.
25 * Philip Withnall, Collabora Ltd.
27 * Alternatively, the contents of this file may be used under the terms of the
28 * the GNU Lesser General Public License Version 2.1 (the "LGPL"), in which
29 * case the provisions of LGPL are applicable instead of those above. If you
30 * wish to allow use of your version of this file only under the terms of the
31 * LGPL and not to allow others to use your version of this file under the
32 * MPL, indicate your decision by deleting the provisions above and replace
33 * them with the notice and other provisions required by the LGPL. If you do
34 * not delete the provisions above, a recipient may use your version of this
35 * file under either the MPL or the LGPL.
38 /* Reproducing license from libjingle for copied code */
42 * Copyright 2004--2005, Google Inc.
44 * Redistribution and use in source and binary forms, with or without
45 * modification, are permitted provided that the following conditions are met:
47 * 1. Redistributions of source code must retain the above copyright notice,
48 * this list of conditions and the following disclaimer.
49 * 2. Redistributions in binary form must reproduce the above copyright notice,
50 * this list of conditions and the following disclaimer in the documentation
51 * and/or other materials provided with the distribution.
52 * 3. The name of the author may not be used to endorse or promote products
53 * derived from this software without specific prior written permission.
55 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
56 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
57 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
58 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
59 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
60 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
61 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
62 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
63 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
64 * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
74 # include <arpa/inet.h>
77 #include "pseudotcp.h"
78 #include "agent-priv.h"
80 struct _PseudoTcpSocketClass {
81 GObjectClass parent_class;
84 typedef struct _PseudoTcpSocketPrivate PseudoTcpSocketPrivate;
87 struct _PseudoTcpSocket {
89 PseudoTcpSocketPrivate *priv;
92 G_DEFINE_TYPE (PseudoTcpSocket, pseudo_tcp_socket, G_TYPE_OBJECT);
94 //////////////////////////////////////////////////////////////////////
96 //////////////////////////////////////////////////////////////////////
99 const guint16 PACKET_MAXIMUMS[] = {
100 65535, // Theoretical maximum, Hyperchannel
102 17914, // 16Mb IBM Token Ring
104 //4464, // IEEE 802.5 (4Mb max)
106 //2048, // Wideband Network
107 2002, // IEEE 802.5 (4Mb recommended)
108 //1536, // Expermental Ethernet Networks
109 //1500, // Ethernet, Point-to-Point (default)
111 1006, // SLIP, ARPANET
112 //576, // X.25 Networks
113 //544, // DEC IP Portal
115 508, // IEEE 802/Source-Rt Bridge, ARCNET
116 296, // Point-to-Point (low delay)
117 //68, // Official minimum
118 0, // End of list marker
121 // FIXME: This is a reasonable MTU, but we should get it from the lower layer
123 #define MAX_PACKET 65532
124 // Note: we removed lowest level because packet overhead was larger!
125 #define MIN_PACKET 296
127 // (+ up to 40 bytes of options?)
128 #define IP_HEADER_SIZE 20
129 #define ICMP_HEADER_SIZE 8
130 #define UDP_HEADER_SIZE 8
131 // TODO: Make JINGLE_HEADER_SIZE transparent to this code?
132 // when relay framing is in use
133 #define JINGLE_HEADER_SIZE 64
135 //////////////////////////////////////////////////////////////////////
136 // Global Constants and Functions
137 //////////////////////////////////////////////////////////////////////
140 // 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
141 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
142 // 0 | Conversation Number |
143 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
144 // 4 | Sequence Number |
145 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
146 // 8 | Acknowledgment Number |
147 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
148 // | | |U|A|P|R|S|F| |
149 // 12 | Control | |R|C|S|S|Y|I| Window |
150 // | | |G|K|H|T|N|N| |
151 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
152 // 16 | Timestamp sending |
153 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
154 // 20 | Timestamp receiving |
155 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
157 // +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
159 //////////////////////////////////////////////////////////////////////
161 #define MAX_SEQ 0xFFFFFFFF
162 #define HEADER_SIZE 24
164 #define PACKET_OVERHEAD (HEADER_SIZE + UDP_HEADER_SIZE + \
165 IP_HEADER_SIZE + JINGLE_HEADER_SIZE)
167 // MIN_RTO = 1 second (RFC6298, Sec 2.4)
169 #define DEF_RTO 1000 /* 1 seconds (RFC 6298 sect 2.1) */
170 #define MAX_RTO 60000 /* 60 seconds */
171 #define DEFAULT_ACK_DELAY 100 /* 100 milliseconds */
172 #define DEFAULT_NO_DELAY FALSE
174 #define DEFAULT_RCV_BUF_SIZE (60 * 1024)
175 #define DEFAULT_SND_BUF_SIZE (90 * 1024)
177 /* NOTE: This must fit in 8 bits. This is used on the wire. */
179 /* Google-provided options: */
180 TCP_OPT_EOL = 0, /* end of list */
181 TCP_OPT_NOOP = 1, /* no-op */
182 TCP_OPT_MSS = 2, /* maximum segment size */
183 TCP_OPT_WND_SCALE = 3, /* window scale factor */
184 /* libnice extensions: */
185 TCP_OPT_FIN_ACK = 254, /* FIN-ACK support */
190 #define FLAG_SYN 0x02
191 #define FLAG_ACK 0x10
194 /* NOTE: This must fit in 5 bits. This is used on the wire. */
202 #define CTL_CONNECT 0
203 //#define CTL_REDIRECT 1
204 #define CTL_EXTRA 255
207 #define CTRL_BOUND 0x80000000
209 /* Maximum segment lifetime (1 minute).
210 * RFC 793, §3.3 specifies 2 minutes; but Linux uses 1 minute, so let’s go with
212 #define TCP_MSL (60 * 1000)
214 // If there are no pending clocks, wake up every 4 seconds
215 #define DEFAULT_TIMEOUT 4000
216 // If the connection is closed, once per minute
217 #define CLOSED_TIMEOUT (60 * 1000)
218 /* Timeout after reaching the TIME_WAIT state, in milliseconds.
219 * See: RFC 1122, §4.2.2.13.
221 * XXX: Since we can control the underlying layer’s channel ID, we can guarantee
222 * delayed segments won’t affect subsequent connections, so can radically
223 * shorten the TIME-WAIT timeout (to the extent that it basically doesn’t
224 * exist). It would normally be (2 * TCP_MSL). */
225 #define TIME_WAIT_TIMEOUT 1
227 //////////////////////////////////////////////////////////////////////
229 //////////////////////////////////////////////////////////////////////
231 # define min(first, second) ((first) < (second) ? (first) : (second))
232 # define max(first, second) ((first) > (second) ? (first) : (second))
236 bound(guint32 lower, guint32 middle, guint32 upper)
238 return min (max (lower, middle), upper);
242 time_is_between(guint32 later, guint32 middle, guint32 earlier)
244 if (earlier <= later) {
245 return ((earlier <= middle) && (middle <= later));
247 return !((later < middle) && (middle < earlier));
252 time_diff(guint32 later, guint32 earlier)
254 guint32 LAST = 0xFFFFFFFF;
255 guint32 HALF = 0x80000000;
256 if (time_is_between(earlier + HALF, later, earlier)) {
257 if (earlier <= later) {
258 return (long)(later - earlier);
260 return (long)(later + (LAST - earlier) + 1);
263 if (later <= earlier) {
264 return -(long) (earlier - later);
266 return -(long)(earlier + (LAST - later) + 1);
271 ////////////////////////////////////////////////////////
272 // PseudoTcpFifo works exactly like FifoBuffer in libjingle
273 ////////////////////////////////////////////////////////
285 pseudo_tcp_fifo_init (PseudoTcpFifo *b, gsize size)
287 b->buffer = g_slice_alloc (size);
288 b->buffer_length = size;
292 pseudo_tcp_fifo_clear (PseudoTcpFifo *b)
295 g_slice_free1 (b->buffer_length, b->buffer);
297 b->buffer_length = 0;
301 pseudo_tcp_fifo_get_buffered (PseudoTcpFifo *b)
303 return b->data_length;
307 pseudo_tcp_fifo_set_capacity (PseudoTcpFifo *b, gsize size)
309 if (b->data_length > size)
312 if (size != b->data_length) {
313 guint8 *buffer = g_slice_alloc (size);
314 gsize copy = b->data_length;
315 gsize tail_copy = min (copy, b->buffer_length - b->read_position);
317 memcpy (buffer, &b->buffer[b->read_position], tail_copy);
318 memcpy (buffer + tail_copy, &b->buffer[0], copy - tail_copy);
319 g_slice_free1 (b->buffer_length, b->buffer);
321 b->buffer_length = size;
322 b->read_position = 0;
329 pseudo_tcp_fifo_consume_read_data (PseudoTcpFifo *b, gsize size)
331 g_assert_cmpint (size, <=, b->data_length);
333 b->read_position = (b->read_position + size) % b->buffer_length;
334 b->data_length -= size;
338 pseudo_tcp_fifo_consume_write_buffer (PseudoTcpFifo *b, gsize size)
340 g_assert_cmpint (size, <=, b->buffer_length - b->data_length);
342 b->data_length += size;
346 pseudo_tcp_fifo_get_write_remaining (PseudoTcpFifo *b)
348 return b->buffer_length - b->data_length;
352 pseudo_tcp_fifo_read_offset (PseudoTcpFifo *b, guint8 *buffer, gsize bytes,
355 gsize available = b->data_length - offset;
356 gsize read_position = (b->read_position + offset) % b->buffer_length;
357 gsize copy = min (bytes, available);
358 gsize tail_copy = min(copy, b->buffer_length - read_position);
361 if (offset >= b->data_length)
364 memcpy(buffer, &b->buffer[read_position], tail_copy);
365 memcpy(buffer + tail_copy, &b->buffer[0], copy - tail_copy);
371 pseudo_tcp_fifo_write_offset (PseudoTcpFifo *b, const guint8 *buffer,
372 gsize bytes, gsize offset)
374 gsize available = b->buffer_length - b->data_length - offset;
375 gsize write_position = (b->read_position + b->data_length + offset)
377 gsize copy = min (bytes, available);
378 gsize tail_copy = min(copy, b->buffer_length - write_position);
380 if (b->data_length + offset >= b->buffer_length) {
384 memcpy(&b->buffer[write_position], buffer, tail_copy);
385 memcpy(&b->buffer[0], buffer + tail_copy, copy - tail_copy);
391 pseudo_tcp_fifo_read (PseudoTcpFifo *b, guint8 *buffer, gsize bytes)
395 copy = pseudo_tcp_fifo_read_offset (b, buffer, bytes, 0);
397 b->read_position = (b->read_position + copy) % b->buffer_length;
398 b->data_length -= copy;
404 pseudo_tcp_fifo_write (PseudoTcpFifo *b, const guint8 *buffer, gsize bytes)
408 copy = pseudo_tcp_fifo_write_offset (b, buffer, bytes, 0);
409 b->data_length += copy;
415 //////////////////////////////////////////////////////////////////////
417 //////////////////////////////////////////////////////////////////////
419 /* Only used if FIN-ACK support is disabled. */
436 guint32 conv, seq, ack;
441 guint32 tsval, tsecr;
456 * @CLOSEDOWN_LOCAL: Error detected locally, or connection forcefully closed
458 * @CLOSEDOWN_REMOTE: RST segment received from the peer.
460 * Reasons for calling closedown().
470 struct _PseudoTcpSocketPrivate {
471 PseudoTcpCallbacks callbacks;
473 Shutdown shutdown; /* only used if !support_fin_ack */
474 gboolean shutdown_reads;
478 PseudoTcpState state;
480 gboolean bReadEnable, bWriteEnable, bOutgoing;
481 guint32 last_traffic;
485 guint32 rbuf_len, rcv_nxt, rcv_wnd, lastrecv;
486 guint8 rwnd_scale; // Window scale factor
488 guint32 rcv_fin; /* sequence number of the received FIN octet, or 0 */
493 guint32 sbuf_len, snd_nxt, snd_wnd, lastsend;
494 guint32 snd_una; /* oldest unacknowledged sequence number */
495 guint8 swnd_scale; // Window scale factor
498 // Maximum segment size, estimated protocol level, largest segment sent
499 guint32 mss, msslevel, largest, mtu_advise;
503 // Timestamp tracking
504 guint32 ts_recent, ts_lastack;
506 // Round-trip calculation
507 guint32 rx_rttvar, rx_srtt, rx_rto;
509 // Congestion avoidance, Fast retransmit/recovery, Delayed ACKs
510 guint32 ssthresh, cwnd;
513 gboolean fast_recovery;
514 guint32 t_ack; /* time a delayed ack was scheduled; 0 if no acks scheduled */
515 guint32 last_acked_ts;
517 gboolean use_nagling;
520 // This is used by unit tests to test backward compatibility of
521 // PseudoTcp implementations that don't support window scaling.
522 gboolean support_wnd_scale;
524 /* Current time. Typically only used for testing, when non-zero. When zero,
525 * the system monotonic clock is used. Units: monotonic milliseconds. */
526 guint32 current_time;
528 /* This is used by compatible implementations (with the TCP_OPT_FIN_ACK
529 * option) to enable correct FIN-ACK connection termination. Defaults to
530 * TRUE unless no compatible option is received. */
531 gboolean support_fin_ack;
534 #define LARGER(a,b) (((a) - (b) - 1) < (G_MAXUINT32 >> 1))
535 #define LARGER_OR_EQUAL(a,b) (((a) - (b)) < (G_MAXUINT32 >> 1))
536 #define SMALLER(a,b) LARGER ((b),(a))
537 #define SMALLER_OR_EQUAL(a,b) LARGER_OR_EQUAL ((b),(a))
542 PROP_CONVERSATION = 1,
549 PROP_SUPPORT_FIN_ACK,
554 static void pseudo_tcp_socket_get_property (GObject *object, guint property_id,
555 GValue *value, GParamSpec *pspec);
556 static void pseudo_tcp_socket_set_property (GObject *object, guint property_id,
557 const GValue *value, GParamSpec *pspec);
558 static void pseudo_tcp_socket_finalize (GObject *object);
561 static void queue_connect_message (PseudoTcpSocket *self);
562 static guint32 queue (PseudoTcpSocket *self, const gchar *data,
563 guint32 len, TcpFlags flags);
564 static PseudoTcpWriteResult packet(PseudoTcpSocket *self, guint32 seq,
565 TcpFlags flags, guint32 offset, guint32 len, guint32 now);
566 static gboolean parse (PseudoTcpSocket *self,
567 const guint8 *_header_buf, gsize header_buf_len,
568 const guint8 *data_buf, gsize data_buf_len);
569 static gboolean process(PseudoTcpSocket *self, Segment *seg);
570 static int transmit(PseudoTcpSocket *self, SSegment *sseg, guint32 now);
571 static void attempt_send(PseudoTcpSocket *self, SendFlags sflags);
572 static void closedown (PseudoTcpSocket *self, guint32 err,
573 ClosedownSource source);
574 static void adjustMTU(PseudoTcpSocket *self);
575 static void parse_options (PseudoTcpSocket *self, const guint8 *data,
577 static void resize_send_buffer (PseudoTcpSocket *self, guint32 new_size);
578 static void resize_receive_buffer (PseudoTcpSocket *self, guint32 new_size);
579 static void set_state (PseudoTcpSocket *self, PseudoTcpState new_state);
580 static void set_state_established (PseudoTcpSocket *self);
581 static void set_state_closed (PseudoTcpSocket *self, guint32 err);
583 static const gchar *pseudo_tcp_state_get_name (PseudoTcpState state);
584 static gboolean pseudo_tcp_state_has_sent_fin (PseudoTcpState state);
585 static gboolean pseudo_tcp_state_has_received_fin (PseudoTcpState state);
586 static gboolean pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state);
588 // The following logging is for detailed (packet-level) pseudotcp analysis only.
589 static PseudoTcpDebugLevel debug_level = PSEUDO_TCP_DEBUG_NONE;
591 #define DEBUG(level, fmt, ...) \
592 if (debug_level >= level) \
593 g_log (level == PSEUDO_TCP_DEBUG_NORMAL ? "libnice-pseudotcp" : "libnice-pseudotcp-verbose", G_LOG_LEVEL_DEBUG, "PseudoTcpSocket %p %s: " fmt, \
594 self, pseudo_tcp_state_get_name (self->priv->state), ## __VA_ARGS__)
597 pseudo_tcp_set_debug_level (PseudoTcpDebugLevel level)
603 get_current_time (PseudoTcpSocket *socket)
605 if (G_UNLIKELY (socket->priv->current_time != 0))
606 return socket->priv->current_time;
608 return g_get_monotonic_time () / 1000;
612 pseudo_tcp_socket_set_time (PseudoTcpSocket *self, guint32 current_time)
614 self->priv->current_time = current_time;
618 pseudo_tcp_socket_class_init (PseudoTcpSocketClass *cls)
620 GObjectClass *object_class = G_OBJECT_CLASS (cls);
622 object_class->get_property = pseudo_tcp_socket_get_property;
623 object_class->set_property = pseudo_tcp_socket_set_property;
624 object_class->finalize = pseudo_tcp_socket_finalize;
626 g_object_class_install_property (object_class, PROP_CONVERSATION,
627 g_param_spec_uint ("conversation", "TCP Conversation ID",
628 "The TCP Conversation ID",
630 G_PARAM_CONSTRUCT_ONLY | G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
632 g_object_class_install_property (object_class, PROP_CALLBACKS,
633 g_param_spec_pointer ("callbacks", "PseudoTcp socket callbacks",
634 "Structure with the callbacks to call when PseudoTcp events happen",
635 G_PARAM_READWRITE | G_PARAM_STATIC_STRINGS));
637 g_object_class_install_property (object_class, PROP_STATE,
638 g_param_spec_uint ("state", "PseudoTcp State",
639 "The current state (enum PseudoTcpState) of the PseudoTcp socket",
640 PSEUDO_TCP_LISTEN, PSEUDO_TCP_CLOSED, PSEUDO_TCP_LISTEN,
641 G_PARAM_READABLE | G_PARAM_STATIC_STRINGS));
643 g_object_class_install_property (object_class, PROP_ACK_DELAY,
644 g_param_spec_uint ("ack-delay", "ACK Delay",
645 "Delayed ACK timeout (in milliseconds)",
646 0, G_MAXUINT, DEFAULT_ACK_DELAY,
647 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
649 g_object_class_install_property (object_class, PROP_NO_DELAY,
650 g_param_spec_boolean ("no-delay", "No Delay",
651 "Disable the Nagle algorithm (like the TCP_NODELAY option)",
653 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
655 g_object_class_install_property (object_class, PROP_RCV_BUF,
656 g_param_spec_uint ("rcv-buf", "Receive Buffer",
657 "Receive Buffer size",
658 1, G_MAXUINT, DEFAULT_RCV_BUF_SIZE,
659 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
661 g_object_class_install_property (object_class, PROP_SND_BUF,
662 g_param_spec_uint ("snd-buf", "Send Buffer",
664 1, G_MAXUINT, DEFAULT_SND_BUF_SIZE,
665 G_PARAM_READWRITE| G_PARAM_STATIC_STRINGS));
668 * PseudoTcpSocket:support-fin-ack:
670 * Whether to support the FIN–ACK extension to the pseudo-TCP protocol for
671 * this socket. The extension is only compatible with other libnice pseudo-TCP
672 * stacks, and not with Jingle pseudo-TCP stacks. If enabled, support is
673 * negotiatied on connection setup, so it is safe for a #PseudoTcpSocket with
674 * support enabled to be used with one with it disabled, or with a Jingle
675 * pseudo-TCP socket which doesn’t support it at all.
677 * Support is enabled by default.
681 g_object_class_install_property (object_class, PROP_SUPPORT_FIN_ACK,
682 g_param_spec_boolean ("support-fin-ack", "Support FIN–ACK",
683 "Whether to enable the optional FIN–ACK support.",
685 G_PARAM_READWRITE | G_PARAM_CONSTRUCT_ONLY | G_PARAM_STATIC_STRINGS));
690 pseudo_tcp_socket_get_property (GObject *object,
695 PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
697 switch (property_id) {
698 case PROP_CONVERSATION:
699 g_value_set_uint (value, self->priv->conv);
702 g_value_set_pointer (value, (gpointer) &self->priv->callbacks);
705 g_value_set_uint (value, self->priv->state);
708 g_value_set_uint (value, self->priv->ack_delay);
711 g_value_set_boolean (value, !self->priv->use_nagling);
714 g_value_set_uint (value, self->priv->rbuf_len);
717 g_value_set_uint (value, self->priv->sbuf_len);
719 case PROP_SUPPORT_FIN_ACK:
720 g_value_set_boolean (value, self->priv->support_fin_ack);
723 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
729 pseudo_tcp_socket_set_property (GObject *object,
734 PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
736 switch (property_id) {
737 case PROP_CONVERSATION:
738 self->priv->conv = g_value_get_uint (value);
742 PseudoTcpCallbacks *c = g_value_get_pointer (value);
743 self->priv->callbacks = *c;
747 self->priv->ack_delay = g_value_get_uint (value);
750 self->priv->use_nagling = !g_value_get_boolean (value);
753 g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
754 resize_receive_buffer (self, g_value_get_uint (value));
757 g_return_if_fail (self->priv->state == PSEUDO_TCP_LISTEN);
758 resize_send_buffer (self, g_value_get_uint (value));
760 case PROP_SUPPORT_FIN_ACK:
761 self->priv->support_fin_ack = g_value_get_boolean (value);
764 G_OBJECT_WARN_INVALID_PROPERTY_ID (object, property_id, pspec);
770 pseudo_tcp_socket_finalize (GObject *object)
772 PseudoTcpSocket *self = PSEUDO_TCP_SOCKET (object);
773 PseudoTcpSocketPrivate *priv = self->priv;
780 while ((sseg = g_queue_pop_head (&priv->slist)))
781 g_slice_free (SSegment, sseg);
782 g_queue_clear (&priv->unsent_slist);
783 for (i = priv->rlist; i; i = i->next) {
784 RSegment *rseg = i->data;
785 g_slice_free (RSegment, rseg);
787 g_list_free (priv->rlist);
790 pseudo_tcp_fifo_clear (&priv->rbuf);
791 pseudo_tcp_fifo_clear (&priv->sbuf);
796 if (G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize)
797 G_OBJECT_CLASS (pseudo_tcp_socket_parent_class)->finalize (object);
802 pseudo_tcp_socket_init (PseudoTcpSocket *obj)
804 /* Use g_new0, and do not use g_object_set_private because the size of
805 * our private data is too big (150KB+) and the g_slice_allow cannot allocate
806 * it. So we handle the private ourselves */
807 PseudoTcpSocketPrivate *priv = g_new0 (PseudoTcpSocketPrivate, 1);
811 priv->shutdown = SD_NONE;
814 priv->rbuf_len = DEFAULT_RCV_BUF_SIZE;
815 pseudo_tcp_fifo_init (&priv->rbuf, priv->rbuf_len);
816 priv->sbuf_len = DEFAULT_SND_BUF_SIZE;
817 pseudo_tcp_fifo_init (&priv->sbuf, priv->sbuf_len);
819 priv->state = PSEUDO_TCP_LISTEN;
821 g_queue_init (&priv->slist);
822 g_queue_init (&priv->unsent_slist);
823 priv->rcv_wnd = priv->rbuf_len;
824 priv->rwnd_scale = priv->swnd_scale = 0;
827 priv->snd_una = priv->rcv_nxt = 0;
828 priv->bReadEnable = TRUE;
829 priv->bWriteEnable = FALSE;
836 priv->mss = MIN_PACKET - PACKET_OVERHEAD;
837 priv->mtu_advise = DEF_MTU;
841 priv->cwnd = 2 * priv->mss;
842 priv->ssthresh = priv->rbuf_len;
843 priv->lastrecv = priv->lastsend = priv->last_traffic = 0;
844 priv->bOutgoing = FALSE;
848 priv->last_acked_ts = 0;
850 priv->ts_recent = priv->ts_lastack = 0;
852 priv->rx_rto = DEF_RTO;
853 priv->rx_srtt = priv->rx_rttvar = 0;
855 priv->ack_delay = DEFAULT_ACK_DELAY;
856 priv->use_nagling = !DEFAULT_NO_DELAY;
858 priv->support_wnd_scale = TRUE;
859 priv->support_fin_ack = TRUE;
862 PseudoTcpSocket *pseudo_tcp_socket_new (guint32 conversation,
863 PseudoTcpCallbacks *callbacks)
866 return g_object_new (PSEUDO_TCP_SOCKET_TYPE,
867 "conversation", conversation,
868 "callbacks", callbacks,
873 queue_connect_message (PseudoTcpSocket *self)
875 PseudoTcpSocketPrivate *priv = self->priv;
879 buf[size++] = CTL_CONNECT;
881 if (priv->support_wnd_scale) {
882 buf[size++] = TCP_OPT_WND_SCALE;
884 buf[size++] = priv->rwnd_scale;
887 if (priv->support_fin_ack) {
888 buf[size++] = TCP_OPT_FIN_ACK;
889 buf[size++] = 1; /* option length; zero is invalid (RFC 1122, §4.2.2.5) */
890 buf[size++] = 0; /* currently unused */
893 priv->snd_wnd = size;
895 queue (self, (char *) buf, size, FLAG_CTL);
899 queue_fin_message (PseudoTcpSocket *self)
901 g_assert (self->priv->support_fin_ack);
903 /* FIN segments are always zero-length. */
904 queue (self, "", 0, FLAG_FIN);
908 queue_rst_message (PseudoTcpSocket *self)
910 g_assert (self->priv->support_fin_ack);
912 /* RST segments are always zero-length. */
913 queue (self, "", 0, FLAG_RST);
917 pseudo_tcp_socket_connect(PseudoTcpSocket *self)
919 PseudoTcpSocketPrivate *priv = self->priv;
921 if (priv->state != PSEUDO_TCP_LISTEN) {
922 priv->error = EINVAL;
926 set_state (self, PSEUDO_TCP_SYN_SENT);
928 queue_connect_message (self);
929 attempt_send(self, sfNone);
935 pseudo_tcp_socket_notify_mtu(PseudoTcpSocket *self, guint16 mtu)
937 PseudoTcpSocketPrivate *priv = self->priv;
938 priv->mtu_advise = mtu;
939 if (priv->state == PSEUDO_TCP_ESTABLISHED) {
945 pseudo_tcp_socket_notify_clock(PseudoTcpSocket *self)
947 PseudoTcpSocketPrivate *priv = self->priv;
948 guint32 now = get_current_time (self);
950 if (priv->state == PSEUDO_TCP_CLOSED)
953 /* If in the TIME-WAIT state, any delayed segments have passed and the
954 * connection can be considered closed from both ends.
955 * FIXME: This should probably actually compare a timestamp before
957 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
958 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
959 "Notified clock in TIME-WAIT state; closing connection.");
960 set_state_closed (self, 0);
963 /* If in the LAST-ACK state, resend the FIN because it hasn’t been ACKed yet.
964 * FIXME: This should probably actually compare a timestamp before
966 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_LAST_ACK) {
967 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
968 "Notified clock in LAST-ACK state; resending FIN segment.");
969 queue_fin_message (self);
970 attempt_send (self, sfFin);
973 // Check if it's time to retransmit a segment
974 if (priv->rto_base &&
975 (time_diff(priv->rto_base + priv->rx_rto, now) <= 0)) {
976 if (g_queue_get_length (&priv->slist) == 0) {
977 g_assert_not_reached ();
979 // Note: (priv->slist.front().xmit == 0)) {
980 // retransmit segments
985 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "timeout retransmit (rto: %u) "
986 "(rto_base: %u) (now: %u) (dup_acks: %u)",
987 priv->rx_rto, priv->rto_base, now, (guint) priv->dup_acks);
989 transmit_status = transmit(self, g_queue_peek_head (&priv->slist), now);
990 if (transmit_status != 0) {
991 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
992 "Error transmitting segment. Closing down.");
993 closedown (self, transmit_status, CLOSEDOWN_LOCAL);
997 nInFlight = priv->snd_nxt - priv->snd_una;
998 priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
999 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "ssthresh: %u = (nInFlight: %u / 2) + "
1000 "2 * mss: %u", priv->ssthresh, nInFlight, priv->mss);
1001 //LOG(LS_INFO) << "priv->ssthresh: " << priv->ssthresh << " nInFlight: " << nInFlight << " priv->mss: " << priv->mss;
1002 priv->cwnd = priv->mss;
1004 // Back off retransmit timer. Note: the limit is lower when connecting.
1005 rto_limit = (priv->state < PSEUDO_TCP_ESTABLISHED) ? DEF_RTO : MAX_RTO;
1006 priv->rx_rto = min(rto_limit, priv->rx_rto * 2);
1007 priv->rto_base = now;
1009 priv->recover = priv->snd_nxt;
1010 if (priv->dup_acks >= 3) {
1012 priv->fast_recovery = FALSE;
1013 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery on timeout");
1018 // Check if it's time to probe closed windows
1019 if ((priv->snd_wnd == 0)
1020 && (time_diff(priv->lastsend + priv->rx_rto, now) <= 0)) {
1021 if (time_diff(now, priv->lastrecv) >= 15000) {
1022 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Receive window closed. Closing down.");
1023 closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1028 packet(self, priv->snd_nxt - 1, 0, 0, 0, now);
1029 priv->lastsend = now;
1031 // back off retransmit timer
1032 priv->rx_rto = min(MAX_RTO, priv->rx_rto * 2);
1035 // Check if it's time to send delayed acks
1036 if (priv->t_ack && (time_diff(priv->t_ack + priv->ack_delay, now) <= 0)) {
1037 packet(self, priv->snd_nxt, 0, 0, 0, now);
1043 pseudo_tcp_socket_notify_packet(PseudoTcpSocket *self,
1044 const gchar * buffer, guint32 len)
1048 if (len > MAX_PACKET) {
1049 //LOG_F(WARNING) << "packet too large";
1050 self->priv->error = EMSGSIZE;
1052 } else if (len < HEADER_SIZE) {
1053 //LOG_F(WARNING) << "packet too small";
1054 self->priv->error = EINVAL;
1058 /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
1059 * closed from within a callback. */
1060 g_object_ref (self);
1061 retval = parse (self, (guint8 *) buffer, HEADER_SIZE,
1062 (guint8 *) buffer + HEADER_SIZE, len - HEADER_SIZE);
1063 g_object_unref (self);
1068 /* Assume there are two buffers in the given #NiceInputMessage: a 24-byte one
1069 * containing the header, and a bigger one for the data. */
1071 pseudo_tcp_socket_notify_message (PseudoTcpSocket *self,
1072 NiceInputMessage *message)
1076 g_assert_cmpuint (message->n_buffers, >, 0);
1078 if (message->n_buffers == 1)
1079 return pseudo_tcp_socket_notify_packet (self, message->buffers[0].buffer,
1080 message->buffers[0].size);
1082 g_assert_cmpuint (message->n_buffers, ==, 2);
1083 g_assert_cmpuint (message->buffers[0].size, ==, HEADER_SIZE);
1085 if (message->length > MAX_PACKET) {
1086 //LOG_F(WARNING) << "packet too large";
1088 } else if (message->length < HEADER_SIZE) {
1089 //LOG_F(WARNING) << "packet too small";
1093 /* Hold a reference to the PseudoTcpSocket during parsing, since it may be
1094 * closed from within a callback. */
1095 g_object_ref (self);
1096 retval = parse (self, message->buffers[0].buffer, message->buffers[0].size,
1097 message->buffers[1].buffer, message->length - message->buffers[0].size);
1098 g_object_unref (self);
1104 pseudo_tcp_socket_get_next_clock(PseudoTcpSocket *self, guint64 *timeout)
1106 PseudoTcpSocketPrivate *priv = self->priv;
1107 guint32 now = get_current_time (self);
1109 guint32 closed_timeout;
1111 if (priv->shutdown == SD_FORCEFUL) {
1112 if (priv->support_fin_ack) {
1113 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1114 "‘Forceful’ shutdown used when FIN-ACK support is enabled");
1117 /* Transition to the CLOSED state. */
1118 closedown (self, 0, CLOSEDOWN_REMOTE);
1123 snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1124 if ((priv->shutdown == SD_GRACEFUL)
1125 && ((priv->state != PSEUDO_TCP_ESTABLISHED)
1126 || ((snd_buffered == 0) && (priv->t_ack == 0)))) {
1127 if (priv->support_fin_ack) {
1128 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1129 "‘Graceful’ shutdown used when FIN-ACK support is enabled");
1132 /* Transition to the CLOSED state. */
1133 closedown (self, 0, CLOSEDOWN_REMOTE);
1138 /* FIN-ACK support. The timeout for closing the socket if nothing is received
1139 * varies depending on whether the socket is waiting in the TIME-WAIT state
1140 * for delayed segments to pass.
1142 * See: http://vincent.bernat.im/en/blog/2014-tcp-time-wait-state-linux.html
1144 closed_timeout = CLOSED_TIMEOUT;
1145 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT)
1146 closed_timeout = TIME_WAIT_TIMEOUT;
1148 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_CLOSED) {
1152 if (*timeout == 0 || *timeout < now)
1153 *timeout = now + closed_timeout;
1155 if (priv->support_fin_ack && priv->state == PSEUDO_TCP_TIME_WAIT) {
1156 *timeout = min (*timeout, now + TIME_WAIT_TIMEOUT);
1160 if (priv->state == PSEUDO_TCP_CLOSED && !priv->support_fin_ack) {
1161 *timeout = min (*timeout, now + CLOSED_TIMEOUT);
1165 *timeout = min (*timeout, now + DEFAULT_TIMEOUT);
1168 *timeout = min(*timeout, priv->t_ack + priv->ack_delay);
1170 if (priv->rto_base) {
1171 *timeout = min(*timeout, priv->rto_base + priv->rx_rto);
1173 if (priv->snd_wnd == 0) {
1174 *timeout = min(*timeout, priv->lastsend + priv->rx_rto);
1182 pseudo_tcp_socket_recv(PseudoTcpSocket *self, char * buffer, size_t len)
1184 PseudoTcpSocketPrivate *priv = self->priv;
1186 gsize available_space;
1188 /* Received a FIN from the peer, so return 0. RFC 793, §3.5, Case 2. */
1189 if (priv->support_fin_ack && priv->shutdown_reads) {
1193 /* Return 0 if FIN-ACK is not supported but the socket has been closed. */
1194 if (!priv->support_fin_ack && pseudo_tcp_socket_is_closed (self)) {
1198 /* Return ENOTCONN if FIN-ACK is not supported and the connection is not
1200 if (!priv->support_fin_ack && priv->state != PSEUDO_TCP_ESTABLISHED) {
1201 priv->error = ENOTCONN;
1208 bytesread = pseudo_tcp_fifo_read (&priv->rbuf, (guint8 *) buffer, len);
1210 // If there's no data in |m_rbuf|.
1211 if (bytesread == 0 &&
1212 !(pseudo_tcp_state_has_received_fin (priv->state) ||
1213 pseudo_tcp_state_has_received_fin_ack (priv->state))) {
1214 priv->bReadEnable = TRUE;
1215 priv->error = EWOULDBLOCK;
1219 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
1221 if (available_space - priv->rcv_wnd >=
1222 min (priv->rbuf_len / 2, priv->mss)) {
1223 // !?! Not sure about this was closed business
1224 gboolean bWasClosed = (priv->rcv_wnd == 0);
1226 priv->rcv_wnd = available_space;
1229 attempt_send(self, sfImmediateAck);
1237 pseudo_tcp_socket_send(PseudoTcpSocket *self, const char * buffer, guint32 len)
1239 PseudoTcpSocketPrivate *priv = self->priv;
1241 gsize available_space;
1243 if (priv->state != PSEUDO_TCP_ESTABLISHED) {
1244 priv->error = pseudo_tcp_state_has_sent_fin (priv->state) ? EPIPE : ENOTCONN;
1248 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
1250 if (!available_space) {
1251 priv->bWriteEnable = TRUE;
1252 priv->error = EWOULDBLOCK;
1256 written = queue (self, buffer, len, FLAG_NONE);
1257 attempt_send(self, sfNone);
1259 if (written > 0 && (guint32)written < len) {
1260 priv->bWriteEnable = TRUE;
1267 pseudo_tcp_socket_close(PseudoTcpSocket *self, gboolean force)
1269 PseudoTcpSocketPrivate *priv = self->priv;
1271 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Closing socket %p %s", self,
1272 force ? "forcefully" : "gracefully");
1274 /* Forced closure by sending an RST segment. RFC 1122, §4.2.2.13. */
1275 if (force && priv->state != PSEUDO_TCP_CLOSED) {
1276 closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1280 /* Fall back to shutdown(). */
1281 pseudo_tcp_socket_shutdown (self, PSEUDO_TCP_SHUTDOWN_RDWR);
1285 pseudo_tcp_socket_shutdown (PseudoTcpSocket *self, PseudoTcpShutdown how)
1287 PseudoTcpSocketPrivate *priv = self->priv;
1289 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Shutting down socket %p: %u", self, how);
1291 /* FIN-ACK--only stuff below here. */
1292 if (!priv->support_fin_ack) {
1293 if (priv->shutdown == SD_NONE)
1294 priv->shutdown = SD_GRACEFUL;
1298 /* What needs shutting down? */
1300 case PSEUDO_TCP_SHUTDOWN_RD:
1301 case PSEUDO_TCP_SHUTDOWN_RDWR:
1302 priv->shutdown_reads = TRUE;
1304 case PSEUDO_TCP_SHUTDOWN_WR:
1305 /* Handled below. */
1308 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid shutdown method: %u.", how);
1312 if (how == PSEUDO_TCP_SHUTDOWN_RD) {
1316 /* Unforced write closure. */
1317 switch (priv->state) {
1318 case PSEUDO_TCP_LISTEN:
1319 case PSEUDO_TCP_SYN_SENT:
1320 /* Just abort the connection without completing the handshake. */
1321 set_state_closed (self, 0);
1323 case PSEUDO_TCP_SYN_RECEIVED:
1324 case PSEUDO_TCP_ESTABLISHED:
1325 /* Local user initiating the close: RFC 793, §3.5, Cases 1 and 3.
1326 * If there is pending receive data, send RST instead of FIN;
1327 * see RFC 1122, §4.2.2.13. */
1328 if (pseudo_tcp_socket_get_available_bytes (self) > 0) {
1329 closedown (self, ECONNABORTED, CLOSEDOWN_LOCAL);
1331 queue_fin_message (self);
1332 attempt_send (self, sfFin);
1333 set_state (self, PSEUDO_TCP_FIN_WAIT_1);
1336 case PSEUDO_TCP_CLOSE_WAIT:
1337 /* Remote user initiating the close: RFC 793, §3.5, Case 2.
1338 * We’ve previously received a FIN from the peer; now the user is closing
1339 * the local end of the connection. */
1340 queue_fin_message (self);
1341 attempt_send (self, sfFin);
1342 set_state (self, PSEUDO_TCP_LAST_ACK);
1344 case PSEUDO_TCP_CLOSING:
1345 case PSEUDO_TCP_CLOSED:
1346 /* Already closed on both sides. */
1348 case PSEUDO_TCP_FIN_WAIT_1:
1349 case PSEUDO_TCP_FIN_WAIT_2:
1350 case PSEUDO_TCP_TIME_WAIT:
1351 case PSEUDO_TCP_LAST_ACK:
1352 /* Already closed locally. */
1361 pseudo_tcp_socket_get_error(PseudoTcpSocket *self)
1363 PseudoTcpSocketPrivate *priv = self->priv;
1368 // Internal Implementation
1372 queue (PseudoTcpSocket *self, const gchar * data, guint32 len, TcpFlags flags)
1374 PseudoTcpSocketPrivate *priv = self->priv;
1375 gsize available_space;
1377 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
1378 if (len > available_space) {
1379 g_assert_cmpint (flags, ==, FLAG_NONE);
1380 len = available_space;
1383 // We can concatenate data if the last segment is the same type
1384 // (control v. regular data), and has not been transmitted yet
1385 if (g_queue_get_length (&priv->slist) &&
1386 (((SSegment *)g_queue_peek_tail (&priv->slist))->flags == flags) &&
1387 (((SSegment *)g_queue_peek_tail (&priv->slist))->xmit == 0)) {
1388 ((SSegment *)g_queue_peek_tail (&priv->slist))->len += len;
1390 SSegment *sseg = g_slice_new0 (SSegment);
1391 gsize snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1393 sseg->seq = priv->snd_una + snd_buffered;
1395 sseg->flags = flags;
1396 g_queue_push_tail (&priv->slist, sseg);
1397 g_queue_push_tail (&priv->unsent_slist, sseg);
1400 //LOG(LS_INFO) << "PseudoTcp::queue - priv->slen = " << priv->slen;
1401 return pseudo_tcp_fifo_write (&priv->sbuf, (guint8*) data, len);;
1404 // Creates a packet and submits it to the network. This method can either
1405 // send payload or just an ACK packet.
1407 // |seq| is the sequence number of this packet.
1408 // |flags| is the flags for sending this packet.
1409 // |offset| is the offset to read from |m_sbuf|.
1410 // |len| is the number of bytes to read from |m_sbuf| as payload. If this
1411 // value is 0 then this is an ACK packet, otherwise this packet has payload.
1413 static PseudoTcpWriteResult
1414 packet(PseudoTcpSocket *self, guint32 seq, TcpFlags flags,
1415 guint32 offset, guint32 len, guint32 now)
1417 PseudoTcpSocketPrivate *priv = self->priv;
1419 guint8 u8[MAX_PACKET];
1420 guint16 u16[MAX_PACKET / 2];
1421 guint32 u32[MAX_PACKET / 4];
1423 PseudoTcpWriteResult wres = WR_SUCCESS;
1425 g_assert_cmpuint (HEADER_SIZE + len, <=, MAX_PACKET);
1427 *buffer.u32 = htonl(priv->conv);
1428 *(buffer.u32 + 1) = htonl(seq);
1429 *(buffer.u32 + 2) = htonl(priv->rcv_nxt);
1431 buffer.u8[13] = flags;
1432 *(buffer.u16 + 7) = htons((guint16)(priv->rcv_wnd >> priv->rwnd_scale));
1434 // Timestamp computations
1435 *(buffer.u32 + 4) = htonl(now);
1436 *(buffer.u32 + 5) = htonl(priv->ts_recent);
1437 priv->ts_lastack = priv->rcv_nxt;
1442 bytes_read = pseudo_tcp_fifo_read_offset (&priv->sbuf, buffer.u8 + HEADER_SIZE,
1444 g_assert_cmpint (bytes_read, ==, len);
1447 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "Sending <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1448 "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1449 priv->conv, (unsigned)flags, seq, seq + len, priv->rcv_nxt, priv->rcv_wnd,
1450 now % 10000, priv->ts_recent % 10000, len);
1452 wres = priv->callbacks.WritePacket(self, (gchar *) buffer.u8, len + HEADER_SIZE,
1453 priv->callbacks.user_data);
1454 /* Note: When len is 0, this is an ACK packet. We don't read the
1455 return value for those, and thus we won't retry. So go ahead and treat
1456 the packet as a success (basically simulate as if it were dropped),
1457 which will prevent our timers from being messed up. */
1458 if ((wres != WR_SUCCESS) && (0 != len))
1463 priv->lastsend = now;
1465 priv->last_traffic = now;
1466 priv->bOutgoing = TRUE;
1472 parse (PseudoTcpSocket *self, const guint8 *_header_buf, gsize header_buf_len,
1473 const guint8 *data_buf, gsize data_buf_len)
1483 header_buf.u8 = _header_buf;
1485 if (header_buf_len != 24)
1488 seg.conv = ntohl(*header_buf.u32);
1489 seg.seq = ntohl(*(header_buf.u32 + 1));
1490 seg.ack = ntohl(*(header_buf.u32 + 2));
1491 seg.flags = header_buf.u8[13];
1492 seg.wnd = ntohs(*(header_buf.u16 + 7));
1494 seg.tsval = ntohl(*(header_buf.u32 + 4));
1495 seg.tsecr = ntohl(*(header_buf.u32 + 5));
1497 seg.data = (const gchar *) data_buf;
1498 seg.len = data_buf_len;
1500 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE,
1501 "Received <CONV=%u><FLG=%u><SEQ=%u:%u><ACK=%u>"
1502 "<WND=%u><TS=%u><TSR=%u><LEN=%u>",
1503 seg.conv, (unsigned)seg.flags, seg.seq, seg.seq + seg.len, seg.ack,
1504 seg.wnd, seg.tsval % 10000, seg.tsecr % 10000, seg.len);
1506 return process(self, &seg);
1509 /* True iff the @state requires that a FIN has already been sent by this
1512 pseudo_tcp_state_has_sent_fin (PseudoTcpState state)
1515 case PSEUDO_TCP_LISTEN:
1516 case PSEUDO_TCP_SYN_SENT:
1517 case PSEUDO_TCP_SYN_RECEIVED:
1518 case PSEUDO_TCP_ESTABLISHED:
1519 case PSEUDO_TCP_CLOSE_WAIT:
1521 case PSEUDO_TCP_CLOSED:
1522 case PSEUDO_TCP_FIN_WAIT_1:
1523 case PSEUDO_TCP_FIN_WAIT_2:
1524 case PSEUDO_TCP_CLOSING:
1525 case PSEUDO_TCP_TIME_WAIT:
1526 case PSEUDO_TCP_LAST_ACK:
1533 /* True iff the @state requires that a FIN has already been received from the
1536 pseudo_tcp_state_has_received_fin (PseudoTcpState state)
1539 case PSEUDO_TCP_LISTEN:
1540 case PSEUDO_TCP_SYN_SENT:
1541 case PSEUDO_TCP_SYN_RECEIVED:
1542 case PSEUDO_TCP_ESTABLISHED:
1543 case PSEUDO_TCP_FIN_WAIT_1:
1544 case PSEUDO_TCP_FIN_WAIT_2:
1546 case PSEUDO_TCP_CLOSED:
1547 case PSEUDO_TCP_CLOSING:
1548 case PSEUDO_TCP_TIME_WAIT:
1549 case PSEUDO_TCP_CLOSE_WAIT:
1550 case PSEUDO_TCP_LAST_ACK:
1557 /* True iff the @state requires that a FIN-ACK has already been received from
1560 pseudo_tcp_state_has_received_fin_ack (PseudoTcpState state)
1563 case PSEUDO_TCP_LISTEN:
1564 case PSEUDO_TCP_SYN_SENT:
1565 case PSEUDO_TCP_SYN_RECEIVED:
1566 case PSEUDO_TCP_ESTABLISHED:
1567 case PSEUDO_TCP_FIN_WAIT_1:
1568 case PSEUDO_TCP_FIN_WAIT_2:
1569 case PSEUDO_TCP_CLOSING:
1570 case PSEUDO_TCP_CLOSE_WAIT:
1571 case PSEUDO_TCP_LAST_ACK:
1573 case PSEUDO_TCP_CLOSED:
1574 case PSEUDO_TCP_TIME_WAIT:
1582 process(PseudoTcpSocket *self, Segment *seg)
1584 PseudoTcpSocketPrivate *priv = self->priv;
1586 SendFlags sflags = sfNone;
1587 gboolean bIgnoreData;
1589 gboolean bConnect = FALSE;
1591 gsize available_space;
1592 guint32 kIdealRefillSize;
1593 gboolean is_valuable_ack, is_duplicate_ack, is_fin_ack = FALSE;
1594 gboolean received_fin = FALSE;
1596 /* If this is the wrong conversation, send a reset!?!
1597 (with the correct conversation?) */
1598 if (seg->conv != priv->conv) {
1599 //if ((seg->flags & FLAG_RST) == 0) {
1600 // packet(sock, tcb, seg->ack, 0, FLAG_RST, 0, 0);
1602 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "wrong conversation");
1606 now = get_current_time (self);
1607 priv->last_traffic = priv->lastrecv = now;
1608 priv->bOutgoing = FALSE;
1610 if (priv->state == PSEUDO_TCP_CLOSED ||
1611 (pseudo_tcp_state_has_received_fin_ack (priv->state) && seg->len > 0)) {
1612 /* Send an RST segment. See: RFC 1122, §4.2.2.13; RFC 793, §3.4, point 3,
1613 * page 37. We can only send RST if we know the peer knows we’re closed;
1614 * otherwise this could be a timeout retransmit from them, due to our
1615 * packets from data through to FIN being dropped. */
1616 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1617 "Segment received while closed; sending RST.");
1618 if ((seg->flags & FLAG_RST) == 0) {
1619 closedown (self, 0, CLOSEDOWN_LOCAL);
1625 // Check if this is a reset segment
1626 if (seg->flags & FLAG_RST) {
1627 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Received RST segment; closing down.");
1628 closedown (self, ECONNRESET, CLOSEDOWN_REMOTE);
1632 // Check for control data
1634 if (seg->flags & FLAG_CTL) {
1635 if (seg->len == 0) {
1636 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Missing control code");
1638 } else if (seg->data[0] == CTL_CONNECT) {
1641 parse_options (self, (guint8 *) &seg->data[1], seg->len - 1);
1643 if (priv->state == PSEUDO_TCP_LISTEN) {
1644 set_state (self, PSEUDO_TCP_SYN_RECEIVED);
1645 queue_connect_message (self);
1646 } else if (priv->state == PSEUDO_TCP_SYN_SENT) {
1647 set_state_established (self);
1650 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Unknown control code: %u", seg->data[0]);
1656 if (SMALLER_OR_EQUAL (seg->seq, priv->ts_lastack) &&
1657 SMALLER (priv->ts_lastack, seg->seq + seg->len)) {
1658 priv->ts_recent = seg->tsval;
1661 // Check if this is a valuable ack
1662 is_valuable_ack = (LARGER(seg->ack, priv->snd_una) &&
1663 SMALLER_OR_EQUAL(seg->ack, priv->snd_nxt));
1664 is_duplicate_ack = (seg->ack == priv->snd_una);
1666 if (is_valuable_ack) {
1670 // Calculate round-trip time
1672 long rtt = time_diff(now, seg->tsecr);
1674 if (priv->rx_srtt == 0) {
1675 priv->rx_srtt = rtt;
1676 priv->rx_rttvar = rtt / 2;
1678 priv->rx_rttvar = (3 * priv->rx_rttvar +
1679 labs((long)(rtt - priv->rx_srtt))) / 4;
1680 priv->rx_srtt = (7 * priv->rx_srtt + rtt) / 8;
1682 priv->rx_rto = bound(MIN_RTO,
1683 priv->rx_srtt + max(1LU, 4 * priv->rx_rttvar), MAX_RTO);
1685 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "rtt: %ld srtt: %u rttvar: %u rto: %u",
1686 rtt, priv->rx_srtt, priv->rx_rttvar, priv->rx_rto);
1688 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid RTT: %ld", rtt);
1692 priv->last_acked_ts = seg->tsecr;
1695 priv->snd_wnd = seg->wnd << priv->swnd_scale;
1697 nAcked = seg->ack - priv->snd_una;
1698 priv->snd_una = seg->ack;
1700 priv->rto_base = (priv->snd_una == priv->snd_nxt) ? 0 : now;
1702 /* ACKs for FIN segments give an increment on nAcked, but there is no
1703 * corresponding byte to read because the FIN segment is empty (it just has
1704 * a sequence number). */
1705 if (nAcked == priv->sbuf.data_length + 1 &&
1706 pseudo_tcp_state_has_sent_fin (priv->state)) {
1711 pseudo_tcp_fifo_consume_read_data (&priv->sbuf, nAcked);
1713 for (nFree = nAcked; nFree > 0; ) {
1716 g_assert_cmpuint (g_queue_get_length (&priv->slist), !=, 0);
1717 data = (SSegment *) g_queue_peek_head (&priv->slist);
1719 if (nFree < data->len) {
1724 if (data->len > priv->largest) {
1725 priv->largest = data->len;
1728 g_slice_free (SSegment, data);
1729 g_queue_pop_head (&priv->slist);
1733 if (priv->dup_acks >= 3) {
1734 if (LARGER_OR_EQUAL (priv->snd_una, priv->recover)) { // NewReno
1735 guint32 nInFlight = priv->snd_nxt - priv->snd_una;
1736 // (Fast Retransmit)
1737 priv->cwnd = min(priv->ssthresh,
1738 max (nInFlight, priv->mss) + priv->mss);
1739 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "exit recovery cwnd=%d ssthresh=%d nInFlight=%d mss: %d", priv->cwnd, priv->ssthresh, nInFlight, priv->mss);
1740 priv->fast_recovery = FALSE;
1743 int transmit_status;
1745 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
1746 transmit_status = transmit(self, g_queue_peek_head (&priv->slist), now);
1747 if (transmit_status != 0) {
1748 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1749 "Error transmitting recovery retransmit segment. Closing down.");
1750 closedown (self, transmit_status, CLOSEDOWN_LOCAL);
1753 priv->cwnd += (nAcked > priv->mss ? priv->mss : 0) -
1754 min(nAcked, priv->cwnd);
1758 // Slow start, congestion avoidance
1759 if (priv->cwnd < priv->ssthresh) {
1760 priv->cwnd += priv->mss;
1762 priv->cwnd += max(1LU, priv->mss * priv->mss / priv->cwnd);
1765 } else if (is_duplicate_ack) {
1766 /* !?! Note, tcp says don't do this... but otherwise how does a
1767 closed window become open? */
1768 priv->snd_wnd = seg->wnd << priv->swnd_scale;
1770 // Check duplicate acks
1772 // it's a dup ack, but with a data payload, so don't modify priv->dup_acks
1773 } else if (priv->snd_una != priv->snd_nxt) {
1776 priv->dup_acks += 1;
1777 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "Received dup ack (dups: %u)",
1779 if (priv->dup_acks == 3) { // (Fast Retransmit)
1780 int transmit_status;
1783 if (LARGER_OR_EQUAL (priv->snd_una, priv->recover) ||
1784 seg->tsecr == priv->last_acked_ts) { /* NewReno */
1785 /* Invoke fast retransmit RFC3782 section 3 step 1A*/
1786 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "enter recovery");
1787 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "recovery retransmit");
1789 transmit_status = transmit(self, g_queue_peek_head (&priv->slist),
1791 if (transmit_status != 0) {
1792 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1793 "Error transmitting recovery retransmit segment. Closing down.");
1795 closedown (self, transmit_status, CLOSEDOWN_LOCAL);
1798 priv->recover = priv->snd_nxt;
1799 nInFlight = priv->snd_nxt - priv->snd_una;
1800 priv->ssthresh = max(nInFlight / 2, 2 * priv->mss);
1801 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1802 "ssthresh: %u = max((nInFlight: %u / 2), 2 * mss: %u)",
1803 priv->ssthresh, nInFlight, priv->mss);
1804 priv->cwnd = priv->ssthresh + 3 * priv->mss;
1805 priv->fast_recovery = TRUE;
1807 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE,
1808 "Skipping fast recovery: recover: %u snd_una: %u", priv->recover,
1811 } else if (priv->dup_acks > 3) {
1812 if (priv->fast_recovery)
1813 priv->cwnd += priv->mss;
1821 if ((priv->state == PSEUDO_TCP_SYN_RECEIVED) && !bConnect) {
1822 set_state_established (self);
1825 /* Check for connection closure. Only pay attention to FIN segments if they
1826 * are in sequence; otherwise we’ve missed a packet earlier in the stream and
1827 * need to request retransmission first. */
1828 if (priv->support_fin_ack) {
1829 /* @received_fin is set when, and only when, all segments preceding the FIN
1830 * have been acknowledged. This is to handle the case where the FIN arrives
1831 * out of order with a preceding data segment. */
1832 if (seg->flags & FLAG_FIN) {
1833 priv->rcv_fin = seg->seq;
1834 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Setting rcv_fin = %u", priv->rcv_fin);
1837 /* For the moment, FIN segments must not contain data. */
1838 if (seg->flags & FLAG_FIN && seg->len != 0) {
1839 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "FIN segment contained data; ignored");
1843 received_fin = (priv->rcv_nxt != 0 && priv->rcv_nxt + seg->len == priv->rcv_fin);
1845 /* Update the state machine, implementing all transitions on ‘rcv FIN’ or
1846 * ‘rcv ACK of FIN’ from RFC 793, Figure 6; and RFC 1122, §4.2.2.8. */
1847 switch (priv->state) {
1848 case PSEUDO_TCP_ESTABLISHED:
1850 /* Received a FIN from the network, RFC 793, §3.5, Case 2.
1851 * The code below will send an ACK for the FIN. */
1852 set_state (self, PSEUDO_TCP_CLOSE_WAIT);
1855 case PSEUDO_TCP_CLOSING:
1857 /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 3. */
1858 set_state (self, PSEUDO_TCP_TIME_WAIT);
1861 case PSEUDO_TCP_LAST_ACK:
1863 /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 2. */
1864 set_state_closed (self, 0);
1867 case PSEUDO_TCP_FIN_WAIT_1:
1868 if (is_fin_ack && received_fin) {
1869 /* Simultaneous close with an ACK for a FIN previously sent,
1870 * RFC 793, §3.5, Case 3. */
1871 set_state (self, PSEUDO_TCP_TIME_WAIT);
1872 } else if (is_fin_ack) {
1873 /* Handle the ACK of a locally-sent FIN flag. RFC 793, §3.5, Case 1. */
1874 set_state (self, PSEUDO_TCP_FIN_WAIT_2);
1875 } else if (received_fin) {
1876 /* Simultaneous close, RFC 793, §3.5, Case 3. */
1877 set_state (self, PSEUDO_TCP_CLOSING);
1880 case PSEUDO_TCP_FIN_WAIT_2:
1882 /* Local user closed the connection, RFC 793, §3.5, Case 1. */
1883 set_state (self, PSEUDO_TCP_TIME_WAIT);
1886 case PSEUDO_TCP_LISTEN:
1887 case PSEUDO_TCP_SYN_SENT:
1888 case PSEUDO_TCP_SYN_RECEIVED:
1889 case PSEUDO_TCP_TIME_WAIT:
1890 case PSEUDO_TCP_CLOSED:
1891 case PSEUDO_TCP_CLOSE_WAIT:
1892 /* Shouldn’t ever hit these cases. */
1894 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1895 "Unexpected state %u when FIN received", priv->state);
1896 } else if (is_fin_ack) {
1897 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1898 "Unexpected state %u when FIN-ACK received", priv->state);
1902 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid state %u when FIN received",
1906 } else if (seg->flags & FLAG_FIN) {
1907 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1908 "Invalid FIN received when FIN-ACK support is disabled");
1909 } else if (is_fin_ack) {
1910 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
1911 "Invalid FIN-ACK received when FIN-ACK support is disabled");
1914 // If we make room in the send queue, notify the user
1915 // The goal it to make sure we always have at least enough data to fill the
1916 // window. We'd like to notify the app when we are halfway to that point.
1917 kIdealRefillSize = (priv->sbuf_len + priv->rbuf_len) / 2;
1919 snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
1920 if (priv->bWriteEnable && snd_buffered < kIdealRefillSize) {
1921 priv->bWriteEnable = FALSE;
1922 if (priv->callbacks.PseudoTcpWritable)
1923 priv->callbacks.PseudoTcpWritable(self, priv->callbacks.user_data);
1926 /* Conditions where acks must be sent:
1927 * 1) Segment is too old (they missed an ACK) (immediately)
1928 * 2) Segment is too new (we missed a segment) (immediately)
1929 * 3) Segment has data (so we need to ACK!) (delayed)
1930 * ... so the only time we don't need to ACK, is an empty segment
1931 * that points to rcv_nxt!
1932 * 4) Segment has the FIN flag set (immediately) — note that the FIN flag
1933 * itself has to be included in the ACK as a numbered byte;
1934 * see RFC 793, §3.3. Also see: RFC 793, §3.5.
1936 if (seg->seq != priv->rcv_nxt) {
1937 sflags = sfDuplicateAck; // (Fast Recovery)
1938 } else if (seg->len != 0) {
1939 if (priv->ack_delay == 0) {
1940 sflags = sfImmediateAck;
1942 sflags = sfDelayedAck;
1944 } else if (received_fin) {
1945 /* FIN flags have a sequence number. Only acknowledge them after all
1946 * preceding octets have been acknowledged. */
1947 sflags = sfImmediateAck;
1950 if (sflags == sfDuplicateAck) {
1951 if (seg->seq > priv->rcv_nxt) {
1952 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too new");
1953 } else if (SMALLER_OR_EQUAL(seg->seq + seg->len, priv->rcv_nxt)) {
1954 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too old");
1958 // Adjust the incoming segment to fit our receive buffer
1959 if (SMALLER(seg->seq, priv->rcv_nxt)) {
1960 guint32 nAdjust = priv->rcv_nxt - seg->seq;
1961 if (nAdjust < seg->len) {
1962 seg->seq += nAdjust;
1963 seg->data += nAdjust;
1964 seg->len -= nAdjust;
1970 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
1972 if ((seg->seq + seg->len - priv->rcv_nxt) > available_space) {
1973 guint32 nAdjust = seg->seq + seg->len - priv->rcv_nxt - available_space;
1974 if (nAdjust < seg->len) {
1975 seg->len -= nAdjust;
1981 bIgnoreData = (seg->flags & FLAG_CTL);
1982 if (!priv->support_fin_ack)
1983 bIgnoreData |= (priv->shutdown != SD_NONE);
1989 if (seg->seq == priv->rcv_nxt) {
1990 priv->rcv_nxt += seg->len;
1993 guint32 nOffset = seg->seq - priv->rcv_nxt;
1996 res = pseudo_tcp_fifo_write_offset (&priv->rbuf, (guint8 *) seg->data,
1998 g_assert_cmpint (res, ==, seg->len);
2000 if (seg->seq == priv->rcv_nxt) {
2003 pseudo_tcp_fifo_consume_write_buffer (&priv->rbuf, seg->len);
2004 priv->rcv_nxt += seg->len;
2005 priv->rcv_wnd -= seg->len;
2010 SMALLER_OR_EQUAL(((RSegment *)iter->data)->seq, priv->rcv_nxt)) {
2011 RSegment *data = (RSegment *)(iter->data);
2012 if (LARGER (data->seq + data->len, priv->rcv_nxt)) {
2013 guint32 nAdjust = (data->seq + data->len) - priv->rcv_nxt;
2014 sflags = sfImmediateAck; // (Fast Recovery)
2015 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Recovered %u bytes (%u -> %u)",
2016 nAdjust, priv->rcv_nxt, priv->rcv_nxt + nAdjust);
2017 pseudo_tcp_fifo_consume_write_buffer (&priv->rbuf, nAdjust);
2018 priv->rcv_nxt += nAdjust;
2019 priv->rcv_wnd -= nAdjust;
2021 g_slice_free (RSegment, priv->rlist->data);
2022 priv->rlist = g_list_delete_link (priv->rlist, priv->rlist);
2027 RSegment *rseg = g_slice_new0 (RSegment);
2029 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Saving %u bytes (%u -> %u)",
2030 seg->len, seg->seq, seg->seq + seg->len);
2031 rseg->seq = seg->seq;
2032 rseg->len = seg->len;
2034 while (iter && SMALLER (((RSegment*)iter->data)->seq, rseg->seq)) {
2035 iter = g_list_next (iter);
2037 priv->rlist = g_list_insert_before(priv->rlist, iter, rseg);
2043 /* FIN flags have a sequence number. */
2048 attempt_send(self, sflags);
2050 // If we have new data, notify the user
2051 if (bNewData && priv->bReadEnable) {
2052 /* priv->bReadEnable = FALSE; — removed so that we’re always notified of
2053 * incoming pseudo-TCP data, rather than having to read the entire buffer
2054 * on each readable() callback before the next callback is enabled.
2055 * (When client-provided buffers are small, this is not possible.) */
2056 if (priv->callbacks.PseudoTcpReadable)
2057 priv->callbacks.PseudoTcpReadable(self, priv->callbacks.user_data);
2064 transmit(PseudoTcpSocket *self, SSegment *segment, guint32 now)
2066 PseudoTcpSocketPrivate *priv = self->priv;
2067 guint32 nTransmit = min(segment->len, priv->mss);
2069 if (segment->xmit >= ((priv->state == PSEUDO_TCP_ESTABLISHED) ? 15 : 30)) {
2070 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "too many retransmits");
2075 guint32 seq = segment->seq;
2076 guint8 flags = segment->flags;
2077 PseudoTcpWriteResult wres;
2079 /* The packet must not have already been acknowledged. */
2080 g_assert_cmpuint (segment->seq - priv->snd_una, <=, 1024 * 1024 * 64);
2082 /* Write out the packet. */
2083 wres = packet(self, seq, flags,
2084 segment->seq - priv->snd_una, nTransmit, now);
2086 if (wres == WR_SUCCESS)
2089 if (wres == WR_FAIL) {
2090 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "packet failed");
2091 return ECONNABORTED; /* FIXME: This error code doesn’t quite seem right */
2094 g_assert_cmpint (wres, ==, WR_TOO_LARGE);
2097 if (PACKET_MAXIMUMS[priv->msslevel + 1] == 0) {
2098 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "MTU too small");
2101 /* !?! We need to break up all outstanding and pending packets
2102 and then retransmit!?! */
2104 priv->mss = PACKET_MAXIMUMS[++priv->msslevel] - PACKET_OVERHEAD;
2105 // I added this... haven't researched actual formula
2106 priv->cwnd = 2 * priv->mss;
2108 if (priv->mss < nTransmit) {
2109 nTransmit = priv->mss;
2113 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %u bytes ", priv->mss);
2116 if (nTransmit < segment->len) {
2117 SSegment *subseg = g_slice_new0 (SSegment);
2118 subseg->seq = segment->seq + nTransmit;
2119 subseg->len = segment->len - nTransmit;
2120 subseg->flags = segment->flags;
2121 subseg->xmit = segment->xmit;
2123 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "mss reduced to %u", priv->mss);
2125 segment->len = nTransmit;
2126 g_queue_insert_after (&priv->slist,
2127 g_queue_find (&priv->slist, segment), subseg);
2128 if (subseg->xmit == 0)
2129 g_queue_insert_after (&priv->unsent_slist,
2130 g_queue_find (&priv->unsent_slist, segment), subseg);
2133 if (segment->xmit == 0) {
2134 g_assert (g_queue_peek_head (&priv->unsent_slist) == segment);
2135 g_queue_pop_head (&priv->unsent_slist);
2136 priv->snd_nxt += segment->len;
2138 /* FIN flags require acknowledgement. */
2139 if (segment->len == 0 && segment->flags & FLAG_FIN)
2144 if (priv->rto_base == 0) {
2145 priv->rto_base = now;
2152 attempt_send(PseudoTcpSocket *self, SendFlags sflags)
2154 PseudoTcpSocketPrivate *priv = self->priv;
2155 guint32 now = get_current_time (self);
2156 gboolean bFirst = TRUE;
2158 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Attempting send with flags %u.", sflags);
2160 if (time_diff(now, priv->lastsend) > (long) priv->rx_rto) {
2161 priv->cwnd = priv->mss;
2174 int transmit_status;
2177 if ((priv->dup_acks == 1) || (priv->dup_acks == 2)) { // Limited Transmit
2178 cwnd += priv->dup_acks * priv->mss;
2180 nWindow = min(priv->snd_wnd, cwnd);
2181 nInFlight = priv->snd_nxt - priv->snd_una;
2182 nUseable = (nInFlight < nWindow) ? (nWindow - nInFlight) : 0;
2183 snd_buffered = pseudo_tcp_fifo_get_buffered (&priv->sbuf);
2184 if (snd_buffered < nInFlight) /* iff a FIN has been sent */
2187 nAvailable = min(snd_buffered - nInFlight, priv->mss);
2189 if (nAvailable > nUseable) {
2190 if (nUseable * 4 < nWindow) {
2191 // RFC 813 - avoid SWS
2194 nAvailable = nUseable;
2199 gsize available_space = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
2202 DEBUG (PSEUDO_TCP_DEBUG_VERBOSE, "[cwnd: %u nWindow: %u nInFlight: %u "
2203 "nAvailable: %u nQueued: %" G_GSIZE_FORMAT " nEmpty: %" G_GSIZE_FORMAT
2204 " nWaiting: %zu ssthresh: %u]",
2205 priv->cwnd, nWindow, nInFlight, nAvailable, snd_buffered,
2206 available_space, snd_buffered - nInFlight, priv->ssthresh);
2209 if (sflags == sfDuplicateAck) {
2210 packet(self, priv->snd_nxt, 0, 0, 0, now);
2215 if (nAvailable == 0 && sflags != sfFin && sflags != sfRst) {
2216 if (sflags == sfNone)
2219 // If this is an immediate ack, or the second delayed ack
2220 if ((sflags == sfImmediateAck || sflags == sfDuplicateAck) ||
2222 packet(self, priv->snd_nxt, 0, 0, 0, now);
2230 // If there is data already in-flight, and we haven't a full segment of
2231 // data ready to send then hold off until we get more to send, or the
2232 // in-flight data is acknowledged.
2233 if (priv->use_nagling && sflags != sfFin && sflags != sfRst &&
2234 (priv->snd_nxt > priv->snd_una) &&
2235 (nAvailable < priv->mss)) {
2239 // Find the next segment to transmit
2240 iter = g_queue_peek_head_link (&priv->unsent_slist);
2245 // If the segment is too large, break it into two
2246 if (sseg->len > nAvailable && sflags != sfFin && sflags != sfRst) {
2247 SSegment *subseg = g_slice_new0 (SSegment);
2248 subseg->seq = sseg->seq + nAvailable;
2249 subseg->len = sseg->len - nAvailable;
2250 subseg->flags = sseg->flags;
2252 sseg->len = nAvailable;
2253 g_queue_insert_after (&priv->unsent_slist, iter, subseg);
2254 g_queue_insert_after (&priv->slist, g_queue_find (&priv->slist, sseg),
2258 transmit_status = transmit(self, sseg, now);
2259 if (transmit_status != 0) {
2260 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "transmit failed");
2262 // TODO: Is this the right thing ?
2263 closedown (self, transmit_status, CLOSEDOWN_REMOTE);
2267 if (sflags == sfImmediateAck || sflags == sfDelayedAck)
2272 /* If @source is %CLOSEDOWN_REMOTE, don’t send an RST packet, since closedown()
2273 * has been called as a result of an RST segment being received.
2274 * See: RFC 1122, §4.2.2.13. */
2276 closedown (PseudoTcpSocket *self, guint32 err, ClosedownSource source)
2278 PseudoTcpSocketPrivate *priv = self->priv;
2280 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Closing down socket %p with %s error %u.",
2281 self, (source == CLOSEDOWN_LOCAL) ? "local" : "remote", err);
2283 if (source == CLOSEDOWN_LOCAL && priv->support_fin_ack) {
2284 queue_rst_message (self);
2285 attempt_send (self, sfRst);
2286 } else if (source == CLOSEDOWN_LOCAL) {
2287 priv->shutdown = SD_FORCEFUL;
2290 /* ‘Cute’ little navigation through the state machine to avoid breaking the
2291 * invariant that CLOSED can only be reached from TIME-WAIT or LAST-ACK. */
2292 switch (priv->state) {
2293 case PSEUDO_TCP_LISTEN:
2294 case PSEUDO_TCP_SYN_SENT:
2296 case PSEUDO_TCP_SYN_RECEIVED:
2297 case PSEUDO_TCP_ESTABLISHED:
2298 set_state (self, PSEUDO_TCP_FIN_WAIT_1);
2300 case PSEUDO_TCP_FIN_WAIT_1:
2301 set_state (self, PSEUDO_TCP_FIN_WAIT_2);
2303 case PSEUDO_TCP_FIN_WAIT_2:
2304 case PSEUDO_TCP_CLOSING:
2305 set_state (self, PSEUDO_TCP_TIME_WAIT);
2307 case PSEUDO_TCP_CLOSE_WAIT:
2308 set_state (self, PSEUDO_TCP_LAST_ACK);
2310 case PSEUDO_TCP_LAST_ACK:
2311 case PSEUDO_TCP_TIME_WAIT:
2312 case PSEUDO_TCP_CLOSED:
2317 set_state_closed (self, err);
2321 adjustMTU(PseudoTcpSocket *self)
2323 PseudoTcpSocketPrivate *priv = self->priv;
2325 // Determine our current mss level, so that we can adjust appropriately later
2326 for (priv->msslevel = 0;
2327 PACKET_MAXIMUMS[priv->msslevel + 1] > 0;
2329 if (((guint16)PACKET_MAXIMUMS[priv->msslevel]) <= priv->mtu_advise) {
2333 priv->mss = priv->mtu_advise - PACKET_OVERHEAD;
2334 // !?! Should we reset priv->largest here?
2335 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Adjusting mss to %u bytes", priv->mss);
2336 // Enforce minimums on ssthresh and cwnd
2337 priv->ssthresh = max(priv->ssthresh, 2 * priv->mss);
2338 priv->cwnd = max(priv->cwnd, priv->mss);
2342 apply_window_scale_option (PseudoTcpSocket *self, guint8 scale_factor)
2344 PseudoTcpSocketPrivate *priv = self->priv;
2346 priv->swnd_scale = scale_factor;
2347 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Setting scale factor to %u", scale_factor);
2351 apply_fin_ack_option (PseudoTcpSocket *self)
2353 PseudoTcpSocketPrivate *priv = self->priv;
2355 priv->support_fin_ack = TRUE;
2359 apply_option (PseudoTcpSocket *self, guint8 kind, const guint8 *data,
2364 DEBUG (PSEUDO_TCP_DEBUG_NORMAL,
2365 "Peer specified MSS option which is not supported.");
2368 case TCP_OPT_WND_SCALE:
2369 // Window scale factor.
2370 // http://www.ietf.org/rfc/rfc1323.txt
2372 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid window scale option received.");
2375 apply_window_scale_option(self, data[0]);
2377 case TCP_OPT_FIN_ACK:
2379 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "FIN-ACK support enabled.");
2380 apply_fin_ack_option (self);
2384 /* Nothing to do. */
2387 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid TCP option %u", kind);
2394 parse_options (PseudoTcpSocket *self, const guint8 *data, guint32 len)
2396 PseudoTcpSocketPrivate *priv = self->priv;
2397 gboolean has_window_scaling_option = FALSE;
2398 gboolean has_fin_ack_option = FALSE;
2401 // See http://www.freesoft.org/CIE/Course/Section4/8.htm for
2402 // parsing the options list.
2404 guint8 kind = TCP_OPT_EOL;
2413 if (kind == TCP_OPT_EOL) {
2414 // End of option list.
2416 } else if (kind == TCP_OPT_NOOP) {
2424 // Length of this option.
2425 opt_len = data[pos];
2428 if (len < pos + opt_len)
2431 // Content of this option.
2432 if (opt_len <= len - pos) {
2433 apply_option (self, kind, data + pos, opt_len);
2436 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Invalid option length received.");
2440 if (kind == TCP_OPT_WND_SCALE)
2441 has_window_scaling_option = TRUE;
2442 else if (kind == TCP_OPT_FIN_ACK)
2443 has_fin_ack_option = TRUE;
2446 if (!has_window_scaling_option) {
2447 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Peer doesn't support window scaling");
2448 if (priv->rwnd_scale > 0) {
2449 // Peer doesn't support TCP options and window scaling.
2450 // Revert receive buffer size to default value.
2451 resize_receive_buffer (self, DEFAULT_RCV_BUF_SIZE);
2452 priv->swnd_scale = 0;
2456 if (!has_fin_ack_option) {
2457 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "Peer doesn't support FIN-ACK");
2458 priv->support_fin_ack = FALSE;
2463 resize_send_buffer (PseudoTcpSocket *self, guint32 new_size)
2465 PseudoTcpSocketPrivate *priv = self->priv;
2467 priv->sbuf_len = new_size;
2468 pseudo_tcp_fifo_set_capacity (&priv->sbuf, new_size);
2473 resize_receive_buffer (PseudoTcpSocket *self, guint32 new_size)
2475 PseudoTcpSocketPrivate *priv = self->priv;
2476 guint8 scale_factor = 0;
2478 gsize available_space;
2480 if (priv->rbuf_len == new_size)
2483 // Determine the scale factor such that the scaled window size can fit
2484 // in a 16-bit unsigned integer.
2485 while (new_size > 0xFFFF) {
2490 // Determine the proper size of the buffer.
2491 new_size <<= scale_factor;
2492 result = pseudo_tcp_fifo_set_capacity (&priv->rbuf, new_size);
2494 // Make sure the new buffer is large enough to contain data in the old
2495 // buffer. This should always be true because this method is called either
2496 // before connection is established or when peers are exchanging connect
2499 priv->rbuf_len = new_size;
2500 priv->rwnd_scale = scale_factor;
2501 priv->ssthresh = new_size;
2503 available_space = pseudo_tcp_fifo_get_write_remaining (&priv->rbuf);
2504 priv->rcv_wnd = available_space;
2508 pseudo_tcp_socket_get_available_bytes (PseudoTcpSocket *self)
2510 PseudoTcpSocketPrivate *priv = self->priv;
2512 return pseudo_tcp_fifo_get_buffered (&priv->rbuf);
2516 pseudo_tcp_socket_can_send (PseudoTcpSocket *self)
2518 return (pseudo_tcp_socket_get_available_send_space (self) > 0);
2522 pseudo_tcp_socket_get_available_send_space (PseudoTcpSocket *self)
2524 PseudoTcpSocketPrivate *priv = self->priv;
2527 if (!pseudo_tcp_state_has_sent_fin (priv->state)) {
2528 ret = pseudo_tcp_fifo_get_write_remaining (&priv->sbuf);
2534 priv->bWriteEnable = TRUE;
2539 /* State names are capitalised and formatted as in RFC 793. */
2540 static const gchar *
2541 pseudo_tcp_state_get_name (PseudoTcpState state)
2544 case PSEUDO_TCP_LISTEN: return "LISTEN";
2545 case PSEUDO_TCP_SYN_SENT: return "SYN-SENT";
2546 case PSEUDO_TCP_SYN_RECEIVED: return "SYN-RECEIVED";
2547 case PSEUDO_TCP_ESTABLISHED: return "ESTABLISHED";
2548 case PSEUDO_TCP_CLOSED: return "CLOSED";
2549 case PSEUDO_TCP_FIN_WAIT_1: return "FIN-WAIT-1";
2550 case PSEUDO_TCP_FIN_WAIT_2: return "FIN-WAIT-2";
2551 case PSEUDO_TCP_CLOSING: return "CLOSING";
2552 case PSEUDO_TCP_TIME_WAIT: return "TIME-WAIT";
2553 case PSEUDO_TCP_CLOSE_WAIT: return "CLOSE-WAIT";
2554 case PSEUDO_TCP_LAST_ACK: return "LAST-ACK";
2555 default: return "UNKNOWN";
2560 set_state (PseudoTcpSocket *self, PseudoTcpState new_state)
2562 PseudoTcpSocketPrivate *priv = self->priv;
2563 PseudoTcpState old_state = priv->state;
2565 if (new_state == old_state)
2568 DEBUG (PSEUDO_TCP_DEBUG_NORMAL, "State %s → %s.",
2569 pseudo_tcp_state_get_name (old_state),
2570 pseudo_tcp_state_get_name (new_state));
2572 /* Check whether it’s a valid state transition. */
2573 #define TRANSITION(OLD, NEW) \
2574 (old_state == PSEUDO_TCP_##OLD && \
2575 new_state == PSEUDO_TCP_##NEW)
2577 /* Valid transitions. See: RFC 793, p23; RFC 1122, §4.2.2.8. */
2578 g_assert (/* RFC 793, p23. */
2579 TRANSITION (CLOSED, SYN_SENT) ||
2580 TRANSITION (SYN_SENT, CLOSED) ||
2581 TRANSITION (CLOSED, LISTEN) ||
2582 TRANSITION (LISTEN, CLOSED) ||
2583 TRANSITION (LISTEN, SYN_SENT) ||
2584 TRANSITION (LISTEN, SYN_RECEIVED) ||
2585 TRANSITION (SYN_SENT, SYN_RECEIVED) ||
2586 TRANSITION (SYN_RECEIVED, ESTABLISHED) ||
2587 TRANSITION (SYN_SENT, ESTABLISHED) ||
2588 TRANSITION (SYN_RECEIVED, FIN_WAIT_1) ||
2589 TRANSITION (ESTABLISHED, FIN_WAIT_1) ||
2590 TRANSITION (ESTABLISHED, CLOSE_WAIT) ||
2591 TRANSITION (FIN_WAIT_1, FIN_WAIT_2) ||
2592 TRANSITION (FIN_WAIT_1, CLOSING) ||
2593 TRANSITION (CLOSE_WAIT, LAST_ACK) ||
2594 TRANSITION (FIN_WAIT_2, TIME_WAIT) ||
2595 TRANSITION (CLOSING, TIME_WAIT) ||
2596 TRANSITION (LAST_ACK, CLOSED) ||
2597 TRANSITION (TIME_WAIT, CLOSED) ||
2598 /* RFC 1122, §4.2.2.8. */
2599 TRANSITION (SYN_RECEIVED, LISTEN) ||
2600 TRANSITION (FIN_WAIT_1, TIME_WAIT));
2604 priv->state = new_state;
2608 set_state_established (PseudoTcpSocket *self)
2610 PseudoTcpSocketPrivate *priv = self->priv;
2612 set_state (self, PSEUDO_TCP_ESTABLISHED);
2615 if (priv->callbacks.PseudoTcpOpened)
2616 priv->callbacks.PseudoTcpOpened (self, priv->callbacks.user_data);
2619 /* (err == 0) means no error. */
2621 set_state_closed (PseudoTcpSocket *self, guint32 err)
2623 PseudoTcpSocketPrivate *priv = self->priv;
2625 set_state (self, PSEUDO_TCP_CLOSED);
2627 /* Only call the callback if there was an error. */
2628 if (priv->callbacks.PseudoTcpClosed && err != 0)
2629 priv->callbacks.PseudoTcpClosed (self, err, priv->callbacks.user_data);
2633 pseudo_tcp_socket_is_closed (PseudoTcpSocket *self)
2635 PseudoTcpSocketPrivate *priv = self->priv;
2637 return (priv->state == PSEUDO_TCP_CLOSED);
2641 pseudo_tcp_socket_is_closed_remotely (PseudoTcpSocket *self)
2643 PseudoTcpSocketPrivate *priv = self->priv;
2645 return pseudo_tcp_state_has_received_fin (priv->state);