2 * linux/net/ipv4/inet_lro.c
4 * Large Receive Offload (ipv4 / tcp)
6 * (C) Copyright IBM Corp. 2007
9 * Jan-Bernd Themann <themann@de.ibm.com>
10 * Christoph Raisch <raisch@de.ibm.com>
13 * This program is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published by
15 * the Free Software Foundation; either version 2, or (at your option)
18 * This program is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU General Public License for more details.
23 * You should have received a copy of the GNU General Public License
24 * along with this program; if not, write to the Free Software
25 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
29 #include <linux/module.h>
30 #include <linux/if_vlan.h>
31 #include <linux/inet_lro.h>
32 #include <net/checksum.h>
34 MODULE_LICENSE("GPL");
35 MODULE_AUTHOR("Jan-Bernd Themann <themann@de.ibm.com>");
36 MODULE_DESCRIPTION("Large Receive Offload (ipv4 / tcp)");
38 #define TCP_HDR_LEN(tcph) (tcph->doff << 2)
39 #define IP_HDR_LEN(iph) (iph->ihl << 2)
40 #define TCP_PAYLOAD_LENGTH(iph, tcph) \
41 (ntohs(iph->tot_len) - IP_HDR_LEN(iph) - TCP_HDR_LEN(tcph))
43 #define IPH_LEN_WO_OPTIONS 5
44 #define TCPH_LEN_WO_OPTIONS 5
45 #define TCPH_LEN_W_TIMESTAMP 8
47 #define LRO_MAX_PG_HLEN 64
49 #define LRO_INC_STATS(lro_mgr, attr) { lro_mgr->stats.attr++; }
52 * Basic tcp checks whether packet is suitable for LRO
55 static int lro_tcp_ip_check(const struct iphdr *iph, const struct tcphdr *tcph,
56 int len, const struct net_lro_desc *lro_desc)
58 /* check ip header: don't aggregate padded frames */
59 if (ntohs(iph->tot_len) != len)
62 if (TCP_PAYLOAD_LENGTH(iph, tcph) == 0)
65 if (iph->ihl != IPH_LEN_WO_OPTIONS)
68 if (tcph->cwr || tcph->ece || tcph->urg || !tcph->ack ||
69 tcph->rst || tcph->syn || tcph->fin)
72 if (INET_ECN_is_ce(ipv4_get_dsfield(iph)))
75 if (tcph->doff != TCPH_LEN_WO_OPTIONS &&
76 tcph->doff != TCPH_LEN_W_TIMESTAMP)
79 /* check tcp options (only timestamp allowed) */
80 if (tcph->doff == TCPH_LEN_W_TIMESTAMP) {
81 __be32 *topt = (__be32 *)(tcph + 1);
83 if (*topt != htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16)
84 | (TCPOPT_TIMESTAMP << 8)
88 /* timestamp should be in right order */
90 if (lro_desc && after(ntohl(lro_desc->tcp_rcv_tsval),
94 /* timestamp reply should not be zero */
103 static void lro_update_tcp_ip_header(struct net_lro_desc *lro_desc)
105 struct iphdr *iph = lro_desc->iph;
106 struct tcphdr *tcph = lro_desc->tcph;
110 tcph->ack_seq = lro_desc->tcp_ack;
111 tcph->window = lro_desc->tcp_window;
113 if (lro_desc->tcp_saw_tstamp) {
114 p = (__be32 *)(tcph + 1);
115 *(p+2) = lro_desc->tcp_rcv_tsecr;
118 csum_replace2(&iph->check, iph->tot_len, htons(lro_desc->ip_tot_len));
119 iph->tot_len = htons(lro_desc->ip_tot_len);
122 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), 0);
123 lro_desc->data_csum = csum_add(lro_desc->data_csum, tcp_hdr_csum);
124 tcph->check = csum_tcpudp_magic(iph->saddr, iph->daddr,
125 lro_desc->ip_tot_len -
126 IP_HDR_LEN(iph), IPPROTO_TCP,
127 lro_desc->data_csum);
130 static __wsum lro_tcp_data_csum(struct iphdr *iph, struct tcphdr *tcph, int len)
134 __wsum tcp_ps_hdr_csum;
136 tcp_csum = ~csum_unfold(tcph->check);
137 tcp_hdr_csum = csum_partial(tcph, TCP_HDR_LEN(tcph), tcp_csum);
139 tcp_ps_hdr_csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
140 len + TCP_HDR_LEN(tcph),
143 return csum_sub(csum_sub(tcp_csum, tcp_hdr_csum),
147 static void lro_init_desc(struct net_lro_desc *lro_desc, struct sk_buff *skb,
148 struct iphdr *iph, struct tcphdr *tcph)
152 u32 tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
154 nr_frags = skb_shinfo(skb)->nr_frags;
155 lro_desc->parent = skb;
156 lro_desc->next_frag = &(skb_shinfo(skb)->frags[nr_frags]);
158 lro_desc->tcph = tcph;
159 lro_desc->tcp_next_seq = ntohl(tcph->seq) + tcp_data_len;
160 lro_desc->tcp_ack = tcph->ack_seq;
161 lro_desc->tcp_window = tcph->window;
163 lro_desc->pkt_aggr_cnt = 1;
164 lro_desc->ip_tot_len = ntohs(iph->tot_len);
166 if (tcph->doff == 8) {
167 ptr = (__be32 *)(tcph+1);
168 lro_desc->tcp_saw_tstamp = 1;
169 lro_desc->tcp_rcv_tsval = *(ptr+1);
170 lro_desc->tcp_rcv_tsecr = *(ptr+2);
173 lro_desc->mss = tcp_data_len;
174 lro_desc->active = 1;
176 lro_desc->data_csum = lro_tcp_data_csum(iph, tcph,
180 static inline void lro_clear_desc(struct net_lro_desc *lro_desc)
182 memset(lro_desc, 0, sizeof(struct net_lro_desc));
185 static void lro_add_common(struct net_lro_desc *lro_desc, struct iphdr *iph,
186 struct tcphdr *tcph, int tcp_data_len)
188 struct sk_buff *parent = lro_desc->parent;
191 lro_desc->pkt_aggr_cnt++;
192 lro_desc->ip_tot_len += tcp_data_len;
193 lro_desc->tcp_next_seq += tcp_data_len;
194 lro_desc->tcp_window = tcph->window;
195 lro_desc->tcp_ack = tcph->ack_seq;
197 /* don't update tcp_rcv_tsval, would not work with PAWS */
198 if (lro_desc->tcp_saw_tstamp) {
199 topt = (__be32 *) (tcph + 1);
200 lro_desc->tcp_rcv_tsecr = *(topt + 2);
203 lro_desc->data_csum = csum_block_add(lro_desc->data_csum,
204 lro_tcp_data_csum(iph, tcph,
208 parent->len += tcp_data_len;
209 parent->data_len += tcp_data_len;
210 if (tcp_data_len > lro_desc->mss)
211 lro_desc->mss = tcp_data_len;
214 static void lro_add_packet(struct net_lro_desc *lro_desc, struct sk_buff *skb,
215 struct iphdr *iph, struct tcphdr *tcph)
217 struct sk_buff *parent = lro_desc->parent;
218 int tcp_data_len = TCP_PAYLOAD_LENGTH(iph, tcph);
220 lro_add_common(lro_desc, iph, tcph, tcp_data_len);
222 skb_pull(skb, (skb->len - tcp_data_len));
223 parent->truesize += skb->truesize;
225 if (lro_desc->last_skb)
226 lro_desc->last_skb->next = skb;
228 skb_shinfo(parent)->frag_list = skb;
230 lro_desc->last_skb = skb;
234 static int lro_check_tcp_conn(struct net_lro_desc *lro_desc,
238 if ((lro_desc->iph->saddr != iph->saddr) ||
239 (lro_desc->iph->daddr != iph->daddr) ||
240 (lro_desc->tcph->source != tcph->source) ||
241 (lro_desc->tcph->dest != tcph->dest))
246 static struct net_lro_desc *lro_get_desc(struct net_lro_mgr *lro_mgr,
247 struct net_lro_desc *lro_arr,
251 struct net_lro_desc *lro_desc = NULL;
252 struct net_lro_desc *tmp;
253 int max_desc = lro_mgr->max_desc;
256 for (i = 0; i < max_desc; i++) {
259 if (!lro_check_tcp_conn(tmp, iph, tcph)) {
265 for (i = 0; i < max_desc; i++) {
266 if (!lro_arr[i].active) {
267 lro_desc = &lro_arr[i];
272 LRO_INC_STATS(lro_mgr, no_desc);
277 static void lro_flush(struct net_lro_mgr *lro_mgr,
278 struct net_lro_desc *lro_desc)
280 if (lro_desc->pkt_aggr_cnt > 1)
281 lro_update_tcp_ip_header(lro_desc);
283 skb_shinfo(lro_desc->parent)->gso_size = lro_desc->mss;
285 if (lro_mgr->features & LRO_F_NAPI)
286 netif_receive_skb(lro_desc->parent);
288 netif_rx(lro_desc->parent);
290 LRO_INC_STATS(lro_mgr, flushed);
291 lro_clear_desc(lro_desc);
294 static int __lro_proc_skb(struct net_lro_mgr *lro_mgr, struct sk_buff *skb,
297 struct net_lro_desc *lro_desc;
301 int vlan_hdr_len = 0;
303 if (!lro_mgr->get_skb_header ||
304 lro_mgr->get_skb_header(skb, (void *)&iph, (void *)&tcph,
308 if (!(flags & LRO_IPV4) || !(flags & LRO_TCP))
311 lro_desc = lro_get_desc(lro_mgr, lro_mgr->lro_arr, iph, tcph);
315 if ((skb->protocol == htons(ETH_P_8021Q)) &&
316 !(lro_mgr->features & LRO_F_EXTRACT_VLAN_ID))
317 vlan_hdr_len = VLAN_HLEN;
319 if (!lro_desc->active) { /* start new lro session */
320 if (lro_tcp_ip_check(iph, tcph, skb->len - vlan_hdr_len, NULL))
323 skb->ip_summed = lro_mgr->ip_summed_aggr;
324 lro_init_desc(lro_desc, skb, iph, tcph);
325 LRO_INC_STATS(lro_mgr, aggregated);
329 if (lro_desc->tcp_next_seq != ntohl(tcph->seq))
332 if (lro_tcp_ip_check(iph, tcph, skb->len, lro_desc))
335 lro_add_packet(lro_desc, skb, iph, tcph);
336 LRO_INC_STATS(lro_mgr, aggregated);
338 if ((lro_desc->pkt_aggr_cnt >= lro_mgr->max_aggr) ||
339 lro_desc->parent->len > (0xFFFF - lro_mgr->dev->mtu))
340 lro_flush(lro_mgr, lro_desc);
344 out2: /* send aggregated SKBs to stack */
345 lro_flush(lro_mgr, lro_desc);
351 void lro_receive_skb(struct net_lro_mgr *lro_mgr,
355 if (__lro_proc_skb(lro_mgr, skb, priv)) {
356 if (lro_mgr->features & LRO_F_NAPI)
357 netif_receive_skb(skb);
362 EXPORT_SYMBOL(lro_receive_skb);
364 void lro_flush_all(struct net_lro_mgr *lro_mgr)
367 struct net_lro_desc *lro_desc = lro_mgr->lro_arr;
369 for (i = 0; i < lro_mgr->max_desc; i++) {
370 if (lro_desc[i].active)
371 lro_flush(lro_mgr, &lro_desc[i]);
374 EXPORT_SYMBOL(lro_flush_all);