clk: x86: Rename clk-lpt to more specific clk-lpss-atom
[platform/kernel/linux-rpi.git] / net / ipv6 / ip6_output.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      IPv6 output functions
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on linux/net/ipv4/ip_output.c
10  *
11  *      Changes:
12  *      A.N.Kuznetsov   :       airthmetics in fragmentation.
13  *                              extension headers are implemented.
14  *                              route changes now work.
15  *                              ip6_forward does not confuse sniffers.
16  *                              etc.
17  *
18  *      H. von Brand    :       Added missing #include <linux/string.h>
19  *      Imran Patel     :       frag id should be in NBO
20  *      Kazunori MIYAZAWA @USAGI
21  *                      :       add ip6_append_data and related functions
22  *                              for datagram xmit
23  */
24
25 #include <linux/errno.h>
26 #include <linux/kernel.h>
27 #include <linux/string.h>
28 #include <linux/socket.h>
29 #include <linux/net.h>
30 #include <linux/netdevice.h>
31 #include <linux/if_arp.h>
32 #include <linux/in6.h>
33 #include <linux/tcp.h>
34 #include <linux/route.h>
35 #include <linux/module.h>
36 #include <linux/slab.h>
37
38 #include <linux/bpf-cgroup.h>
39 #include <linux/netfilter.h>
40 #include <linux/netfilter_ipv6.h>
41
42 #include <net/sock.h>
43 #include <net/snmp.h>
44
45 #include <net/ipv6.h>
46 #include <net/ndisc.h>
47 #include <net/protocol.h>
48 #include <net/ip6_route.h>
49 #include <net/addrconf.h>
50 #include <net/rawv6.h>
51 #include <net/icmp.h>
52 #include <net/xfrm.h>
53 #include <net/checksum.h>
54 #include <linux/mroute6.h>
55 #include <net/l3mdev.h>
56 #include <net/lwtunnel.h>
57 #include <net/ip_tunnels.h>
58
59 static int ip6_finish_output2(struct net *net, struct sock *sk, struct sk_buff *skb)
60 {
61         struct dst_entry *dst = skb_dst(skb);
62         struct net_device *dev = dst->dev;
63         const struct in6_addr *nexthop;
64         struct neighbour *neigh;
65         int ret;
66
67         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
68                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
69
70                 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) &&
71                     ((mroute6_is_socket(net, skb) &&
72                      !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
73                      ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
74                                          &ipv6_hdr(skb)->saddr))) {
75                         struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
76
77                         /* Do not check for IFF_ALLMULTI; multicast routing
78                            is not supported in any case.
79                          */
80                         if (newskb)
81                                 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
82                                         net, sk, newskb, NULL, newskb->dev,
83                                         dev_loopback_xmit);
84
85                         if (ipv6_hdr(skb)->hop_limit == 0) {
86                                 IP6_INC_STATS(net, idev,
87                                               IPSTATS_MIB_OUTDISCARDS);
88                                 kfree_skb(skb);
89                                 return 0;
90                         }
91                 }
92
93                 IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUTMCAST, skb->len);
94
95                 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb)->daddr) <=
96                     IPV6_ADDR_SCOPE_NODELOCAL &&
97                     !(dev->flags & IFF_LOOPBACK)) {
98                         kfree_skb(skb);
99                         return 0;
100                 }
101         }
102
103         if (lwtunnel_xmit_redirect(dst->lwtstate)) {
104                 int res = lwtunnel_xmit(skb);
105
106                 if (res < 0 || res == LWTUNNEL_XMIT_DONE)
107                         return res;
108         }
109
110         rcu_read_lock_bh();
111         nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr);
112         neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop);
113         if (unlikely(!neigh))
114                 neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false);
115         if (!IS_ERR(neigh)) {
116                 sock_confirm_neigh(skb, neigh);
117                 ret = neigh_output(neigh, skb, false);
118                 rcu_read_unlock_bh();
119                 return ret;
120         }
121         rcu_read_unlock_bh();
122
123         IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
124         kfree_skb(skb);
125         return -EINVAL;
126 }
127
128 static int
129 ip6_finish_output_gso_slowpath_drop(struct net *net, struct sock *sk,
130                                     struct sk_buff *skb, unsigned int mtu)
131 {
132         struct sk_buff *segs, *nskb;
133         netdev_features_t features;
134         int ret = 0;
135
136         /* Please see corresponding comment in ip_finish_output_gso
137          * describing the cases where GSO segment length exceeds the
138          * egress MTU.
139          */
140         features = netif_skb_features(skb);
141         segs = skb_gso_segment(skb, features & ~NETIF_F_GSO_MASK);
142         if (IS_ERR_OR_NULL(segs)) {
143                 kfree_skb(skb);
144                 return -ENOMEM;
145         }
146
147         consume_skb(skb);
148
149         skb_list_walk_safe(segs, segs, nskb) {
150                 int err;
151
152                 skb_mark_not_on_list(segs);
153                 err = ip6_fragment(net, sk, segs, ip6_finish_output2);
154                 if (err && ret == 0)
155                         ret = err;
156         }
157
158         return ret;
159 }
160
161 static int __ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
162 {
163         unsigned int mtu;
164
165 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
166         /* Policy lookup after SNAT yielded a new policy */
167         if (skb_dst(skb)->xfrm) {
168                 IPCB(skb)->flags |= IPSKB_REROUTED;
169                 return dst_output(net, sk, skb);
170         }
171 #endif
172
173         mtu = ip6_skb_dst_mtu(skb);
174         if (skb_is_gso(skb) && !skb_gso_validate_network_len(skb, mtu))
175                 return ip6_finish_output_gso_slowpath_drop(net, sk, skb, mtu);
176
177         if ((skb->len > mtu && !skb_is_gso(skb)) ||
178             dst_allfrag(skb_dst(skb)) ||
179             (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size))
180                 return ip6_fragment(net, sk, skb, ip6_finish_output2);
181         else
182                 return ip6_finish_output2(net, sk, skb);
183 }
184
185 static int ip6_finish_output(struct net *net, struct sock *sk, struct sk_buff *skb)
186 {
187         int ret;
188
189         ret = BPF_CGROUP_RUN_PROG_INET_EGRESS(sk, skb);
190         switch (ret) {
191         case NET_XMIT_SUCCESS:
192                 return __ip6_finish_output(net, sk, skb);
193         case NET_XMIT_CN:
194                 return __ip6_finish_output(net, sk, skb) ? : ret;
195         default:
196                 kfree_skb(skb);
197                 return ret;
198         }
199 }
200
201 int ip6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
202 {
203         struct net_device *dev = skb_dst(skb)->dev, *indev = skb->dev;
204         struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
205
206         skb->protocol = htons(ETH_P_IPV6);
207         skb->dev = dev;
208
209         if (unlikely(idev->cnf.disable_ipv6)) {
210                 IP6_INC_STATS(net, idev, IPSTATS_MIB_OUTDISCARDS);
211                 kfree_skb(skb);
212                 return 0;
213         }
214
215         return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING,
216                             net, sk, skb, indev, dev,
217                             ip6_finish_output,
218                             !(IP6CB(skb)->flags & IP6SKB_REROUTED));
219 }
220 EXPORT_SYMBOL(ip6_output);
221
222 bool ip6_autoflowlabel(struct net *net, const struct ipv6_pinfo *np)
223 {
224         if (!np->autoflowlabel_set)
225                 return ip6_default_np_autolabel(net);
226         else
227                 return np->autoflowlabel;
228 }
229
230 /*
231  * xmit an sk_buff (used by TCP, SCTP and DCCP)
232  * Note : socket lock is not held for SYNACK packets, but might be modified
233  * by calls to skb_set_owner_w() and ipv6_local_error(),
234  * which are using proper atomic operations or spinlocks.
235  */
236 int ip6_xmit(const struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
237              __u32 mark, struct ipv6_txoptions *opt, int tclass, u32 priority)
238 {
239         struct net *net = sock_net(sk);
240         const struct ipv6_pinfo *np = inet6_sk(sk);
241         struct in6_addr *first_hop = &fl6->daddr;
242         struct dst_entry *dst = skb_dst(skb);
243         unsigned int head_room;
244         struct ipv6hdr *hdr;
245         u8  proto = fl6->flowi6_proto;
246         int seg_len = skb->len;
247         int hlimit = -1;
248         u32 mtu;
249
250         head_room = sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
251         if (opt)
252                 head_room += opt->opt_nflen + opt->opt_flen;
253
254         if (unlikely(skb_headroom(skb) < head_room)) {
255                 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
256                 if (!skb2) {
257                         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
258                                       IPSTATS_MIB_OUTDISCARDS);
259                         kfree_skb(skb);
260                         return -ENOBUFS;
261                 }
262                 if (skb->sk)
263                         skb_set_owner_w(skb2, skb->sk);
264                 consume_skb(skb);
265                 skb = skb2;
266         }
267
268         if (opt) {
269                 seg_len += opt->opt_nflen + opt->opt_flen;
270
271                 if (opt->opt_flen)
272                         ipv6_push_frag_opts(skb, opt, &proto);
273
274                 if (opt->opt_nflen)
275                         ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop,
276                                              &fl6->saddr);
277         }
278
279         skb_push(skb, sizeof(struct ipv6hdr));
280         skb_reset_network_header(skb);
281         hdr = ipv6_hdr(skb);
282
283         /*
284          *      Fill in the IPv6 header
285          */
286         if (np)
287                 hlimit = np->hop_limit;
288         if (hlimit < 0)
289                 hlimit = ip6_dst_hoplimit(dst);
290
291         ip6_flow_hdr(hdr, tclass, ip6_make_flowlabel(net, skb, fl6->flowlabel,
292                                 ip6_autoflowlabel(net, np), fl6));
293
294         hdr->payload_len = htons(seg_len);
295         hdr->nexthdr = proto;
296         hdr->hop_limit = hlimit;
297
298         hdr->saddr = fl6->saddr;
299         hdr->daddr = *first_hop;
300
301         skb->protocol = htons(ETH_P_IPV6);
302         skb->priority = priority;
303         skb->mark = mark;
304
305         mtu = dst_mtu(dst);
306         if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) {
307                 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
308                               IPSTATS_MIB_OUT, skb->len);
309
310                 /* if egress device is enslaved to an L3 master device pass the
311                  * skb to its handler for processing
312                  */
313                 skb = l3mdev_ip6_out((struct sock *)sk, skb);
314                 if (unlikely(!skb))
315                         return 0;
316
317                 /* hooks should never assume socket lock is held.
318                  * we promote our socket to non const
319                  */
320                 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT,
321                                net, (struct sock *)sk, skb, NULL, dst->dev,
322                                dst_output);
323         }
324
325         skb->dev = dst->dev;
326         /* ipv6_local_error() does not require socket lock,
327          * we promote our socket to non const
328          */
329         ipv6_local_error((struct sock *)sk, EMSGSIZE, fl6, mtu);
330
331         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
332         kfree_skb(skb);
333         return -EMSGSIZE;
334 }
335 EXPORT_SYMBOL(ip6_xmit);
336
337 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
338 {
339         struct ip6_ra_chain *ra;
340         struct sock *last = NULL;
341
342         read_lock(&ip6_ra_lock);
343         for (ra = ip6_ra_chain; ra; ra = ra->next) {
344                 struct sock *sk = ra->sk;
345                 if (sk && ra->sel == sel &&
346                     (!sk->sk_bound_dev_if ||
347                      sk->sk_bound_dev_if == skb->dev->ifindex)) {
348                         struct ipv6_pinfo *np = inet6_sk(sk);
349
350                         if (np && np->rtalert_isolate &&
351                             !net_eq(sock_net(sk), dev_net(skb->dev))) {
352                                 continue;
353                         }
354                         if (last) {
355                                 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
356                                 if (skb2)
357                                         rawv6_rcv(last, skb2);
358                         }
359                         last = sk;
360                 }
361         }
362
363         if (last) {
364                 rawv6_rcv(last, skb);
365                 read_unlock(&ip6_ra_lock);
366                 return 1;
367         }
368         read_unlock(&ip6_ra_lock);
369         return 0;
370 }
371
372 static int ip6_forward_proxy_check(struct sk_buff *skb)
373 {
374         struct ipv6hdr *hdr = ipv6_hdr(skb);
375         u8 nexthdr = hdr->nexthdr;
376         __be16 frag_off;
377         int offset;
378
379         if (ipv6_ext_hdr(nexthdr)) {
380                 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
381                 if (offset < 0)
382                         return 0;
383         } else
384                 offset = sizeof(struct ipv6hdr);
385
386         if (nexthdr == IPPROTO_ICMPV6) {
387                 struct icmp6hdr *icmp6;
388
389                 if (!pskb_may_pull(skb, (skb_network_header(skb) +
390                                          offset + 1 - skb->data)))
391                         return 0;
392
393                 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
394
395                 switch (icmp6->icmp6_type) {
396                 case NDISC_ROUTER_SOLICITATION:
397                 case NDISC_ROUTER_ADVERTISEMENT:
398                 case NDISC_NEIGHBOUR_SOLICITATION:
399                 case NDISC_NEIGHBOUR_ADVERTISEMENT:
400                 case NDISC_REDIRECT:
401                         /* For reaction involving unicast neighbor discovery
402                          * message destined to the proxied address, pass it to
403                          * input function.
404                          */
405                         return 1;
406                 default:
407                         break;
408                 }
409         }
410
411         /*
412          * The proxying router can't forward traffic sent to a link-local
413          * address, so signal the sender and discard the packet. This
414          * behavior is clarified by the MIPv6 specification.
415          */
416         if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
417                 dst_link_failure(skb);
418                 return -1;
419         }
420
421         return 0;
422 }
423
424 static inline int ip6_forward_finish(struct net *net, struct sock *sk,
425                                      struct sk_buff *skb)
426 {
427         struct dst_entry *dst = skb_dst(skb);
428
429         __IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
430         __IP6_ADD_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
431
432 #ifdef CONFIG_NET_SWITCHDEV
433         if (skb->offload_l3_fwd_mark) {
434                 consume_skb(skb);
435                 return 0;
436         }
437 #endif
438
439         skb->tstamp = 0;
440         return dst_output(net, sk, skb);
441 }
442
443 static bool ip6_pkt_too_big(const struct sk_buff *skb, unsigned int mtu)
444 {
445         if (skb->len <= mtu)
446                 return false;
447
448         /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
449         if (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)
450                 return true;
451
452         if (skb->ignore_df)
453                 return false;
454
455         if (skb_is_gso(skb) && skb_gso_validate_network_len(skb, mtu))
456                 return false;
457
458         return true;
459 }
460
461 int ip6_forward(struct sk_buff *skb)
462 {
463         struct inet6_dev *idev = __in6_dev_get_safely(skb->dev);
464         struct dst_entry *dst = skb_dst(skb);
465         struct ipv6hdr *hdr = ipv6_hdr(skb);
466         struct inet6_skb_parm *opt = IP6CB(skb);
467         struct net *net = dev_net(dst->dev);
468         u32 mtu;
469
470         if (net->ipv6.devconf_all->forwarding == 0)
471                 goto error;
472
473         if (skb->pkt_type != PACKET_HOST)
474                 goto drop;
475
476         if (unlikely(skb->sk))
477                 goto drop;
478
479         if (skb_warn_if_lro(skb))
480                 goto drop;
481
482         if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
483                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
484                 goto drop;
485         }
486
487         skb_forward_csum(skb);
488
489         /*
490          *      We DO NOT make any processing on
491          *      RA packets, pushing them to user level AS IS
492          *      without ane WARRANTY that application will be able
493          *      to interpret them. The reason is that we
494          *      cannot make anything clever here.
495          *
496          *      We are not end-node, so that if packet contains
497          *      AH/ESP, we cannot make anything.
498          *      Defragmentation also would be mistake, RA packets
499          *      cannot be fragmented, because there is no warranty
500          *      that different fragments will go along one path. --ANK
501          */
502         if (unlikely(opt->flags & IP6SKB_ROUTERALERT)) {
503                 if (ip6_call_ra_chain(skb, ntohs(opt->ra)))
504                         return 0;
505         }
506
507         /*
508          *      check and decrement ttl
509          */
510         if (hdr->hop_limit <= 1) {
511                 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
512                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INHDRERRORS);
513
514                 kfree_skb(skb);
515                 return -ETIMEDOUT;
516         }
517
518         /* XXX: idev->cnf.proxy_ndp? */
519         if (net->ipv6.devconf_all->proxy_ndp &&
520             pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
521                 int proxied = ip6_forward_proxy_check(skb);
522                 if (proxied > 0)
523                         return ip6_input(skb);
524                 else if (proxied < 0) {
525                         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
526                         goto drop;
527                 }
528         }
529
530         if (!xfrm6_route_forward(skb)) {
531                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS);
532                 goto drop;
533         }
534         dst = skb_dst(skb);
535
536         /* IPv6 specs say nothing about it, but it is clear that we cannot
537            send redirects to source routed frames.
538            We don't send redirects to frames decapsulated from IPsec.
539          */
540         if (IP6CB(skb)->iif == dst->dev->ifindex &&
541             opt->srcrt == 0 && !skb_sec_path(skb)) {
542                 struct in6_addr *target = NULL;
543                 struct inet_peer *peer;
544                 struct rt6_info *rt;
545
546                 /*
547                  *      incoming and outgoing devices are the same
548                  *      send a redirect.
549                  */
550
551                 rt = (struct rt6_info *) dst;
552                 if (rt->rt6i_flags & RTF_GATEWAY)
553                         target = &rt->rt6i_gateway;
554                 else
555                         target = &hdr->daddr;
556
557                 peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1);
558
559                 /* Limit redirects both by destination (here)
560                    and by source (inside ndisc_send_redirect)
561                  */
562                 if (inet_peer_xrlim_allow(peer, 1*HZ))
563                         ndisc_send_redirect(skb, target);
564                 if (peer)
565                         inet_putpeer(peer);
566         } else {
567                 int addrtype = ipv6_addr_type(&hdr->saddr);
568
569                 /* This check is security critical. */
570                 if (addrtype == IPV6_ADDR_ANY ||
571                     addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
572                         goto error;
573                 if (addrtype & IPV6_ADDR_LINKLOCAL) {
574                         icmpv6_send(skb, ICMPV6_DEST_UNREACH,
575                                     ICMPV6_NOT_NEIGHBOUR, 0);
576                         goto error;
577                 }
578         }
579
580         mtu = ip6_dst_mtu_forward(dst);
581         if (mtu < IPV6_MIN_MTU)
582                 mtu = IPV6_MIN_MTU;
583
584         if (ip6_pkt_too_big(skb, mtu)) {
585                 /* Again, force OUTPUT device used as source address */
586                 skb->dev = dst->dev;
587                 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
588                 __IP6_INC_STATS(net, idev, IPSTATS_MIB_INTOOBIGERRORS);
589                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
590                                 IPSTATS_MIB_FRAGFAILS);
591                 kfree_skb(skb);
592                 return -EMSGSIZE;
593         }
594
595         if (skb_cow(skb, dst->dev->hard_header_len)) {
596                 __IP6_INC_STATS(net, ip6_dst_idev(dst),
597                                 IPSTATS_MIB_OUTDISCARDS);
598                 goto drop;
599         }
600
601         hdr = ipv6_hdr(skb);
602
603         /* Mangling hops number delayed to point after skb COW */
604
605         hdr->hop_limit--;
606
607         return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD,
608                        net, NULL, skb, skb->dev, dst->dev,
609                        ip6_forward_finish);
610
611 error:
612         __IP6_INC_STATS(net, idev, IPSTATS_MIB_INADDRERRORS);
613 drop:
614         kfree_skb(skb);
615         return -EINVAL;
616 }
617
618 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
619 {
620         to->pkt_type = from->pkt_type;
621         to->priority = from->priority;
622         to->protocol = from->protocol;
623         skb_dst_drop(to);
624         skb_dst_set(to, dst_clone(skb_dst(from)));
625         to->dev = from->dev;
626         to->mark = from->mark;
627
628         skb_copy_hash(to, from);
629
630 #ifdef CONFIG_NET_SCHED
631         to->tc_index = from->tc_index;
632 #endif
633         nf_copy(to, from);
634         skb_ext_copy(to, from);
635         skb_copy_secmark(to, from);
636 }
637
638 int ip6_fraglist_init(struct sk_buff *skb, unsigned int hlen, u8 *prevhdr,
639                       u8 nexthdr, __be32 frag_id,
640                       struct ip6_fraglist_iter *iter)
641 {
642         unsigned int first_len;
643         struct frag_hdr *fh;
644
645         /* BUILD HEADER */
646         *prevhdr = NEXTHDR_FRAGMENT;
647         iter->tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
648         if (!iter->tmp_hdr)
649                 return -ENOMEM;
650
651         iter->frag = skb_shinfo(skb)->frag_list;
652         skb_frag_list_init(skb);
653
654         iter->offset = 0;
655         iter->hlen = hlen;
656         iter->frag_id = frag_id;
657         iter->nexthdr = nexthdr;
658
659         __skb_pull(skb, hlen);
660         fh = __skb_push(skb, sizeof(struct frag_hdr));
661         __skb_push(skb, hlen);
662         skb_reset_network_header(skb);
663         memcpy(skb_network_header(skb), iter->tmp_hdr, hlen);
664
665         fh->nexthdr = nexthdr;
666         fh->reserved = 0;
667         fh->frag_off = htons(IP6_MF);
668         fh->identification = frag_id;
669
670         first_len = skb_pagelen(skb);
671         skb->data_len = first_len - skb_headlen(skb);
672         skb->len = first_len;
673         ipv6_hdr(skb)->payload_len = htons(first_len - sizeof(struct ipv6hdr));
674
675         return 0;
676 }
677 EXPORT_SYMBOL(ip6_fraglist_init);
678
679 void ip6_fraglist_prepare(struct sk_buff *skb,
680                           struct ip6_fraglist_iter *iter)
681 {
682         struct sk_buff *frag = iter->frag;
683         unsigned int hlen = iter->hlen;
684         struct frag_hdr *fh;
685
686         frag->ip_summed = CHECKSUM_NONE;
687         skb_reset_transport_header(frag);
688         fh = __skb_push(frag, sizeof(struct frag_hdr));
689         __skb_push(frag, hlen);
690         skb_reset_network_header(frag);
691         memcpy(skb_network_header(frag), iter->tmp_hdr, hlen);
692         iter->offset += skb->len - hlen - sizeof(struct frag_hdr);
693         fh->nexthdr = iter->nexthdr;
694         fh->reserved = 0;
695         fh->frag_off = htons(iter->offset);
696         if (frag->next)
697                 fh->frag_off |= htons(IP6_MF);
698         fh->identification = iter->frag_id;
699         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
700         ip6_copy_metadata(frag, skb);
701 }
702 EXPORT_SYMBOL(ip6_fraglist_prepare);
703
704 void ip6_frag_init(struct sk_buff *skb, unsigned int hlen, unsigned int mtu,
705                    unsigned short needed_tailroom, int hdr_room, u8 *prevhdr,
706                    u8 nexthdr, __be32 frag_id, struct ip6_frag_state *state)
707 {
708         state->prevhdr = prevhdr;
709         state->nexthdr = nexthdr;
710         state->frag_id = frag_id;
711
712         state->hlen = hlen;
713         state->mtu = mtu;
714
715         state->left = skb->len - hlen;  /* Space per frame */
716         state->ptr = hlen;              /* Where to start from */
717
718         state->hroom = hdr_room;
719         state->troom = needed_tailroom;
720
721         state->offset = 0;
722 }
723 EXPORT_SYMBOL(ip6_frag_init);
724
725 struct sk_buff *ip6_frag_next(struct sk_buff *skb, struct ip6_frag_state *state)
726 {
727         u8 *prevhdr = state->prevhdr, *fragnexthdr_offset;
728         struct sk_buff *frag;
729         struct frag_hdr *fh;
730         unsigned int len;
731
732         len = state->left;
733         /* IF: it doesn't fit, use 'mtu' - the data space left */
734         if (len > state->mtu)
735                 len = state->mtu;
736         /* IF: we are not sending up to and including the packet end
737            then align the next start on an eight byte boundary */
738         if (len < state->left)
739                 len &= ~7;
740
741         /* Allocate buffer */
742         frag = alloc_skb(len + state->hlen + sizeof(struct frag_hdr) +
743                          state->hroom + state->troom, GFP_ATOMIC);
744         if (!frag)
745                 return ERR_PTR(-ENOMEM);
746
747         /*
748          *      Set up data on packet
749          */
750
751         ip6_copy_metadata(frag, skb);
752         skb_reserve(frag, state->hroom);
753         skb_put(frag, len + state->hlen + sizeof(struct frag_hdr));
754         skb_reset_network_header(frag);
755         fh = (struct frag_hdr *)(skb_network_header(frag) + state->hlen);
756         frag->transport_header = (frag->network_header + state->hlen +
757                                   sizeof(struct frag_hdr));
758
759         /*
760          *      Charge the memory for the fragment to any owner
761          *      it might possess
762          */
763         if (skb->sk)
764                 skb_set_owner_w(frag, skb->sk);
765
766         /*
767          *      Copy the packet header into the new buffer.
768          */
769         skb_copy_from_linear_data(skb, skb_network_header(frag), state->hlen);
770
771         fragnexthdr_offset = skb_network_header(frag);
772         fragnexthdr_offset += prevhdr - skb_network_header(skb);
773         *fragnexthdr_offset = NEXTHDR_FRAGMENT;
774
775         /*
776          *      Build fragment header.
777          */
778         fh->nexthdr = state->nexthdr;
779         fh->reserved = 0;
780         fh->identification = state->frag_id;
781
782         /*
783          *      Copy a block of the IP datagram.
784          */
785         BUG_ON(skb_copy_bits(skb, state->ptr, skb_transport_header(frag),
786                              len));
787         state->left -= len;
788
789         fh->frag_off = htons(state->offset);
790         if (state->left > 0)
791                 fh->frag_off |= htons(IP6_MF);
792         ipv6_hdr(frag)->payload_len = htons(frag->len - sizeof(struct ipv6hdr));
793
794         state->ptr += len;
795         state->offset += len;
796
797         return frag;
798 }
799 EXPORT_SYMBOL(ip6_frag_next);
800
801 int ip6_fragment(struct net *net, struct sock *sk, struct sk_buff *skb,
802                  int (*output)(struct net *, struct sock *, struct sk_buff *))
803 {
804         struct sk_buff *frag;
805         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
806         struct ipv6_pinfo *np = skb->sk && !dev_recursion_level() ?
807                                 inet6_sk(skb->sk) : NULL;
808         struct ip6_frag_state state;
809         unsigned int mtu, hlen, nexthdr_offset;
810         ktime_t tstamp = skb->tstamp;
811         int hroom, err = 0;
812         __be32 frag_id;
813         u8 *prevhdr, nexthdr = 0;
814
815         err = ip6_find_1stfragopt(skb, &prevhdr);
816         if (err < 0)
817                 goto fail;
818         hlen = err;
819         nexthdr = *prevhdr;
820         nexthdr_offset = prevhdr - skb_network_header(skb);
821
822         mtu = ip6_skb_dst_mtu(skb);
823
824         /* We must not fragment if the socket is set to force MTU discovery
825          * or if the skb it not generated by a local socket.
826          */
827         if (unlikely(!skb->ignore_df && skb->len > mtu))
828                 goto fail_toobig;
829
830         if (IP6CB(skb)->frag_max_size) {
831                 if (IP6CB(skb)->frag_max_size > mtu)
832                         goto fail_toobig;
833
834                 /* don't send fragments larger than what we received */
835                 mtu = IP6CB(skb)->frag_max_size;
836                 if (mtu < IPV6_MIN_MTU)
837                         mtu = IPV6_MIN_MTU;
838         }
839
840         if (np && np->frag_size < mtu) {
841                 if (np->frag_size)
842                         mtu = np->frag_size;
843         }
844         if (mtu < hlen + sizeof(struct frag_hdr) + 8)
845                 goto fail_toobig;
846         mtu -= hlen + sizeof(struct frag_hdr);
847
848         frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr,
849                                     &ipv6_hdr(skb)->saddr);
850
851         if (skb->ip_summed == CHECKSUM_PARTIAL &&
852             (err = skb_checksum_help(skb)))
853                 goto fail;
854
855         prevhdr = skb_network_header(skb) + nexthdr_offset;
856         hroom = LL_RESERVED_SPACE(rt->dst.dev);
857         if (skb_has_frag_list(skb)) {
858                 unsigned int first_len = skb_pagelen(skb);
859                 struct ip6_fraglist_iter iter;
860                 struct sk_buff *frag2;
861
862                 if (first_len - hlen > mtu ||
863                     ((first_len - hlen) & 7) ||
864                     skb_cloned(skb) ||
865                     skb_headroom(skb) < (hroom + sizeof(struct frag_hdr)))
866                         goto slow_path;
867
868                 skb_walk_frags(skb, frag) {
869                         /* Correct geometry. */
870                         if (frag->len > mtu ||
871                             ((frag->len & 7) && frag->next) ||
872                             skb_headroom(frag) < (hlen + hroom + sizeof(struct frag_hdr)))
873                                 goto slow_path_clean;
874
875                         /* Partially cloned skb? */
876                         if (skb_shared(frag))
877                                 goto slow_path_clean;
878
879                         BUG_ON(frag->sk);
880                         if (skb->sk) {
881                                 frag->sk = skb->sk;
882                                 frag->destructor = sock_wfree;
883                         }
884                         skb->truesize -= frag->truesize;
885                 }
886
887                 err = ip6_fraglist_init(skb, hlen, prevhdr, nexthdr, frag_id,
888                                         &iter);
889                 if (err < 0)
890                         goto fail;
891
892                 for (;;) {
893                         /* Prepare header of the next frame,
894                          * before previous one went down. */
895                         if (iter.frag)
896                                 ip6_fraglist_prepare(skb, &iter);
897
898                         skb->tstamp = tstamp;
899                         err = output(net, sk, skb);
900                         if (!err)
901                                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
902                                               IPSTATS_MIB_FRAGCREATES);
903
904                         if (err || !iter.frag)
905                                 break;
906
907                         skb = ip6_fraglist_next(&iter);
908                 }
909
910                 kfree(iter.tmp_hdr);
911
912                 if (err == 0) {
913                         IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
914                                       IPSTATS_MIB_FRAGOKS);
915                         return 0;
916                 }
917
918                 kfree_skb_list(iter.frag);
919
920                 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
921                               IPSTATS_MIB_FRAGFAILS);
922                 return err;
923
924 slow_path_clean:
925                 skb_walk_frags(skb, frag2) {
926                         if (frag2 == frag)
927                                 break;
928                         frag2->sk = NULL;
929                         frag2->destructor = NULL;
930                         skb->truesize += frag2->truesize;
931                 }
932         }
933
934 slow_path:
935         /*
936          *      Fragment the datagram.
937          */
938
939         ip6_frag_init(skb, hlen, mtu, rt->dst.dev->needed_tailroom,
940                       LL_RESERVED_SPACE(rt->dst.dev), prevhdr, nexthdr, frag_id,
941                       &state);
942
943         /*
944          *      Keep copying data until we run out.
945          */
946
947         while (state.left > 0) {
948                 frag = ip6_frag_next(skb, &state);
949                 if (IS_ERR(frag)) {
950                         err = PTR_ERR(frag);
951                         goto fail;
952                 }
953
954                 /*
955                  *      Put this fragment into the sending queue.
956                  */
957                 frag->tstamp = tstamp;
958                 err = output(net, sk, frag);
959                 if (err)
960                         goto fail;
961
962                 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
963                               IPSTATS_MIB_FRAGCREATES);
964         }
965         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
966                       IPSTATS_MIB_FRAGOKS);
967         consume_skb(skb);
968         return err;
969
970 fail_toobig:
971         if (skb->sk && dst_allfrag(skb_dst(skb)))
972                 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
973
974         icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
975         err = -EMSGSIZE;
976
977 fail:
978         IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
979                       IPSTATS_MIB_FRAGFAILS);
980         kfree_skb(skb);
981         return err;
982 }
983
984 static inline int ip6_rt_check(const struct rt6key *rt_key,
985                                const struct in6_addr *fl_addr,
986                                const struct in6_addr *addr_cache)
987 {
988         return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
989                 (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache));
990 }
991
992 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
993                                           struct dst_entry *dst,
994                                           const struct flowi6 *fl6)
995 {
996         struct ipv6_pinfo *np = inet6_sk(sk);
997         struct rt6_info *rt;
998
999         if (!dst)
1000                 goto out;
1001
1002         if (dst->ops->family != AF_INET6) {
1003                 dst_release(dst);
1004                 return NULL;
1005         }
1006
1007         rt = (struct rt6_info *)dst;
1008         /* Yes, checking route validity in not connected
1009          * case is not very simple. Take into account,
1010          * that we do not support routing by source, TOS,
1011          * and MSG_DONTROUTE            --ANK (980726)
1012          *
1013          * 1. ip6_rt_check(): If route was host route,
1014          *    check that cached destination is current.
1015          *    If it is network route, we still may
1016          *    check its validity using saved pointer
1017          *    to the last used address: daddr_cache.
1018          *    We do not want to save whole address now,
1019          *    (because main consumer of this service
1020          *    is tcp, which has not this problem),
1021          *    so that the last trick works only on connected
1022          *    sockets.
1023          * 2. oif also should be the same.
1024          */
1025         if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
1026 #ifdef CONFIG_IPV6_SUBTREES
1027             ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
1028 #endif
1029            (!(fl6->flowi6_flags & FLOWI_FLAG_SKIP_NH_OIF) &&
1030               (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex))) {
1031                 dst_release(dst);
1032                 dst = NULL;
1033         }
1034
1035 out:
1036         return dst;
1037 }
1038
1039 static int ip6_dst_lookup_tail(struct net *net, const struct sock *sk,
1040                                struct dst_entry **dst, struct flowi6 *fl6)
1041 {
1042 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1043         struct neighbour *n;
1044         struct rt6_info *rt;
1045 #endif
1046         int err;
1047         int flags = 0;
1048
1049         /* The correct way to handle this would be to do
1050          * ip6_route_get_saddr, and then ip6_route_output; however,
1051          * the route-specific preferred source forces the
1052          * ip6_route_output call _before_ ip6_route_get_saddr.
1053          *
1054          * In source specific routing (no src=any default route),
1055          * ip6_route_output will fail given src=any saddr, though, so
1056          * that's why we try it again later.
1057          */
1058         if (ipv6_addr_any(&fl6->saddr)) {
1059                 struct fib6_info *from;
1060                 struct rt6_info *rt;
1061
1062                 *dst = ip6_route_output(net, sk, fl6);
1063                 rt = (*dst)->error ? NULL : (struct rt6_info *)*dst;
1064
1065                 rcu_read_lock();
1066                 from = rt ? rcu_dereference(rt->from) : NULL;
1067                 err = ip6_route_get_saddr(net, from, &fl6->daddr,
1068                                           sk ? inet6_sk(sk)->srcprefs : 0,
1069                                           &fl6->saddr);
1070                 rcu_read_unlock();
1071
1072                 if (err)
1073                         goto out_err_release;
1074
1075                 /* If we had an erroneous initial result, pretend it
1076                  * never existed and let the SA-enabled version take
1077                  * over.
1078                  */
1079                 if ((*dst)->error) {
1080                         dst_release(*dst);
1081                         *dst = NULL;
1082                 }
1083
1084                 if (fl6->flowi6_oif)
1085                         flags |= RT6_LOOKUP_F_IFACE;
1086         }
1087
1088         if (!*dst)
1089                 *dst = ip6_route_output_flags(net, sk, fl6, flags);
1090
1091         err = (*dst)->error;
1092         if (err)
1093                 goto out_err_release;
1094
1095 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1096         /*
1097          * Here if the dst entry we've looked up
1098          * has a neighbour entry that is in the INCOMPLETE
1099          * state and the src address from the flow is
1100          * marked as OPTIMISTIC, we release the found
1101          * dst entry and replace it instead with the
1102          * dst entry of the nexthop router
1103          */
1104         rt = (struct rt6_info *) *dst;
1105         rcu_read_lock_bh();
1106         n = __ipv6_neigh_lookup_noref(rt->dst.dev,
1107                                       rt6_nexthop(rt, &fl6->daddr));
1108         err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0;
1109         rcu_read_unlock_bh();
1110
1111         if (err) {
1112                 struct inet6_ifaddr *ifp;
1113                 struct flowi6 fl_gw6;
1114                 int redirect;
1115
1116                 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
1117                                       (*dst)->dev, 1);
1118
1119                 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
1120                 if (ifp)
1121                         in6_ifa_put(ifp);
1122
1123                 if (redirect) {
1124                         /*
1125                          * We need to get the dst entry for the
1126                          * default router instead
1127                          */
1128                         dst_release(*dst);
1129                         memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1130                         memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1131                         *dst = ip6_route_output(net, sk, &fl_gw6);
1132                         err = (*dst)->error;
1133                         if (err)
1134                                 goto out_err_release;
1135                 }
1136         }
1137 #endif
1138         if (ipv6_addr_v4mapped(&fl6->saddr) &&
1139             !(ipv6_addr_v4mapped(&fl6->daddr) || ipv6_addr_any(&fl6->daddr))) {
1140                 err = -EAFNOSUPPORT;
1141                 goto out_err_release;
1142         }
1143
1144         return 0;
1145
1146 out_err_release:
1147         dst_release(*dst);
1148         *dst = NULL;
1149
1150         if (err == -ENETUNREACH)
1151                 IP6_INC_STATS(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1152         return err;
1153 }
1154
1155 /**
1156  *      ip6_dst_lookup - perform route lookup on flow
1157  *      @net: Network namespace to perform lookup in
1158  *      @sk: socket which provides route info
1159  *      @dst: pointer to dst_entry * for result
1160  *      @fl6: flow to lookup
1161  *
1162  *      This function performs a route lookup on the given flow.
1163  *
1164  *      It returns zero on success, or a standard errno code on error.
1165  */
1166 int ip6_dst_lookup(struct net *net, struct sock *sk, struct dst_entry **dst,
1167                    struct flowi6 *fl6)
1168 {
1169         *dst = NULL;
1170         return ip6_dst_lookup_tail(net, sk, dst, fl6);
1171 }
1172 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1173
1174 /**
1175  *      ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1176  *      @net: Network namespace to perform lookup in
1177  *      @sk: socket which provides route info
1178  *      @fl6: flow to lookup
1179  *      @final_dst: final destination address for ipsec lookup
1180  *
1181  *      This function performs a route lookup on the given flow.
1182  *
1183  *      It returns a valid dst pointer on success, or a pointer encoded
1184  *      error code.
1185  */
1186 struct dst_entry *ip6_dst_lookup_flow(struct net *net, const struct sock *sk, struct flowi6 *fl6,
1187                                       const struct in6_addr *final_dst)
1188 {
1189         struct dst_entry *dst = NULL;
1190         int err;
1191
1192         err = ip6_dst_lookup_tail(net, sk, &dst, fl6);
1193         if (err)
1194                 return ERR_PTR(err);
1195         if (final_dst)
1196                 fl6->daddr = *final_dst;
1197
1198         return xfrm_lookup_route(net, dst, flowi6_to_flowi(fl6), sk, 0);
1199 }
1200 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1201
1202 /**
1203  *      ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1204  *      @sk: socket which provides the dst cache and route info
1205  *      @fl6: flow to lookup
1206  *      @final_dst: final destination address for ipsec lookup
1207  *      @connected: whether @sk is connected or not
1208  *
1209  *      This function performs a route lookup on the given flow with the
1210  *      possibility of using the cached route in the socket if it is valid.
1211  *      It will take the socket dst lock when operating on the dst cache.
1212  *      As a result, this function can only be used in process context.
1213  *
1214  *      In addition, for a connected socket, cache the dst in the socket
1215  *      if the current cache is not valid.
1216  *
1217  *      It returns a valid dst pointer on success, or a pointer encoded
1218  *      error code.
1219  */
1220 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1221                                          const struct in6_addr *final_dst,
1222                                          bool connected)
1223 {
1224         struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1225
1226         dst = ip6_sk_dst_check(sk, dst, fl6);
1227         if (dst)
1228                 return dst;
1229
1230         dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_dst);
1231         if (connected && !IS_ERR(dst))
1232                 ip6_sk_dst_store_flow(sk, dst_clone(dst), fl6);
1233
1234         return dst;
1235 }
1236 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1237
1238 /**
1239  *      ip6_dst_lookup_tunnel - perform route lookup on tunnel
1240  *      @skb: Packet for which lookup is done
1241  *      @dev: Tunnel device
1242  *      @net: Network namespace of tunnel device
1243  *      @sock: Socket which provides route info
1244  *      @saddr: Memory to store the src ip address
1245  *      @info: Tunnel information
1246  *      @protocol: IP protocol
1247  *      @use_cache: Flag to enable cache usage
1248  *      This function performs a route lookup on a tunnel
1249  *
1250  *      It returns a valid dst pointer and stores src address to be used in
1251  *      tunnel in param saddr on success, else a pointer encoded error code.
1252  */
1253
1254 struct dst_entry *ip6_dst_lookup_tunnel(struct sk_buff *skb,
1255                                         struct net_device *dev,
1256                                         struct net *net,
1257                                         struct socket *sock,
1258                                         struct in6_addr *saddr,
1259                                         const struct ip_tunnel_info *info,
1260                                         u8 protocol,
1261                                         bool use_cache)
1262 {
1263         struct dst_entry *dst = NULL;
1264 #ifdef CONFIG_DST_CACHE
1265         struct dst_cache *dst_cache;
1266 #endif
1267         struct flowi6 fl6;
1268         __u8 prio;
1269
1270 #ifdef CONFIG_DST_CACHE
1271         dst_cache = (struct dst_cache *)&info->dst_cache;
1272         if (use_cache) {
1273                 dst = dst_cache_get_ip6(dst_cache, saddr);
1274                 if (dst)
1275                         return dst;
1276         }
1277 #endif
1278         memset(&fl6, 0, sizeof(fl6));
1279         fl6.flowi6_mark = skb->mark;
1280         fl6.flowi6_proto = protocol;
1281         fl6.daddr = info->key.u.ipv6.dst;
1282         fl6.saddr = info->key.u.ipv6.src;
1283         prio = info->key.tos;
1284         fl6.flowlabel = ip6_make_flowinfo(RT_TOS(prio),
1285                                           info->key.label);
1286
1287         dst = ipv6_stub->ipv6_dst_lookup_flow(net, sock->sk, &fl6,
1288                                               NULL);
1289         if (IS_ERR(dst)) {
1290                 netdev_dbg(dev, "no route to %pI6\n", &fl6.daddr);
1291                 return ERR_PTR(-ENETUNREACH);
1292         }
1293         if (dst->dev == dev) { /* is this necessary? */
1294                 netdev_dbg(dev, "circular route to %pI6\n", &fl6.daddr);
1295                 dst_release(dst);
1296                 return ERR_PTR(-ELOOP);
1297         }
1298 #ifdef CONFIG_DST_CACHE
1299         if (use_cache)
1300                 dst_cache_set_ip6(dst_cache, dst, &fl6.saddr);
1301 #endif
1302         *saddr = fl6.saddr;
1303         return dst;
1304 }
1305 EXPORT_SYMBOL_GPL(ip6_dst_lookup_tunnel);
1306
1307 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1308                                                gfp_t gfp)
1309 {
1310         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1311 }
1312
1313 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1314                                                 gfp_t gfp)
1315 {
1316         return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1317 }
1318
1319 static void ip6_append_data_mtu(unsigned int *mtu,
1320                                 int *maxfraglen,
1321                                 unsigned int fragheaderlen,
1322                                 struct sk_buff *skb,
1323                                 struct rt6_info *rt,
1324                                 unsigned int orig_mtu)
1325 {
1326         if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1327                 if (!skb) {
1328                         /* first fragment, reserve header_len */
1329                         *mtu = orig_mtu - rt->dst.header_len;
1330
1331                 } else {
1332                         /*
1333                          * this fragment is not first, the headers
1334                          * space is regarded as data space.
1335                          */
1336                         *mtu = orig_mtu;
1337                 }
1338                 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1339                               + fragheaderlen - sizeof(struct frag_hdr);
1340         }
1341 }
1342
1343 static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork,
1344                           struct inet6_cork *v6_cork, struct ipcm6_cookie *ipc6,
1345                           struct rt6_info *rt, struct flowi6 *fl6)
1346 {
1347         struct ipv6_pinfo *np = inet6_sk(sk);
1348         unsigned int mtu;
1349         struct ipv6_txoptions *opt = ipc6->opt;
1350
1351         /*
1352          * setup for corking
1353          */
1354         if (opt) {
1355                 if (WARN_ON(v6_cork->opt))
1356                         return -EINVAL;
1357
1358                 v6_cork->opt = kzalloc(sizeof(*opt), sk->sk_allocation);
1359                 if (unlikely(!v6_cork->opt))
1360                         return -ENOBUFS;
1361
1362                 v6_cork->opt->tot_len = sizeof(*opt);
1363                 v6_cork->opt->opt_flen = opt->opt_flen;
1364                 v6_cork->opt->opt_nflen = opt->opt_nflen;
1365
1366                 v6_cork->opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1367                                                     sk->sk_allocation);
1368                 if (opt->dst0opt && !v6_cork->opt->dst0opt)
1369                         return -ENOBUFS;
1370
1371                 v6_cork->opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1372                                                     sk->sk_allocation);
1373                 if (opt->dst1opt && !v6_cork->opt->dst1opt)
1374                         return -ENOBUFS;
1375
1376                 v6_cork->opt->hopopt = ip6_opt_dup(opt->hopopt,
1377                                                    sk->sk_allocation);
1378                 if (opt->hopopt && !v6_cork->opt->hopopt)
1379                         return -ENOBUFS;
1380
1381                 v6_cork->opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1382                                                     sk->sk_allocation);
1383                 if (opt->srcrt && !v6_cork->opt->srcrt)
1384                         return -ENOBUFS;
1385
1386                 /* need source address above miyazawa*/
1387         }
1388         dst_hold(&rt->dst);
1389         cork->base.dst = &rt->dst;
1390         cork->fl.u.ip6 = *fl6;
1391         v6_cork->hop_limit = ipc6->hlimit;
1392         v6_cork->tclass = ipc6->tclass;
1393         if (rt->dst.flags & DST_XFRM_TUNNEL)
1394                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1395                       READ_ONCE(rt->dst.dev->mtu) : dst_mtu(&rt->dst);
1396         else
1397                 mtu = np->pmtudisc >= IPV6_PMTUDISC_PROBE ?
1398                         READ_ONCE(rt->dst.dev->mtu) : dst_mtu(xfrm_dst_path(&rt->dst));
1399         if (np->frag_size < mtu) {
1400                 if (np->frag_size)
1401                         mtu = np->frag_size;
1402         }
1403         if (mtu < IPV6_MIN_MTU)
1404                 return -EINVAL;
1405         cork->base.fragsize = mtu;
1406         cork->base.gso_size = ipc6->gso_size;
1407         cork->base.tx_flags = 0;
1408         cork->base.mark = ipc6->sockc.mark;
1409         sock_tx_timestamp(sk, ipc6->sockc.tsflags, &cork->base.tx_flags);
1410
1411         if (dst_allfrag(xfrm_dst_path(&rt->dst)))
1412                 cork->base.flags |= IPCORK_ALLFRAG;
1413         cork->base.length = 0;
1414
1415         cork->base.transmit_time = ipc6->sockc.transmit_time;
1416
1417         return 0;
1418 }
1419
1420 static int __ip6_append_data(struct sock *sk,
1421                              struct flowi6 *fl6,
1422                              struct sk_buff_head *queue,
1423                              struct inet_cork *cork,
1424                              struct inet6_cork *v6_cork,
1425                              struct page_frag *pfrag,
1426                              int getfrag(void *from, char *to, int offset,
1427                                          int len, int odd, struct sk_buff *skb),
1428                              void *from, int length, int transhdrlen,
1429                              unsigned int flags, struct ipcm6_cookie *ipc6)
1430 {
1431         struct sk_buff *skb, *skb_prev = NULL;
1432         unsigned int maxfraglen, fragheaderlen, mtu, orig_mtu, pmtu;
1433         struct ubuf_info *uarg = NULL;
1434         int exthdrlen = 0;
1435         int dst_exthdrlen = 0;
1436         int hh_len;
1437         int copy;
1438         int err;
1439         int offset = 0;
1440         u32 tskey = 0;
1441         struct rt6_info *rt = (struct rt6_info *)cork->dst;
1442         struct ipv6_txoptions *opt = v6_cork->opt;
1443         int csummode = CHECKSUM_NONE;
1444         unsigned int maxnonfragsize, headersize;
1445         unsigned int wmem_alloc_delta = 0;
1446         bool paged, extra_uref = false;
1447
1448         skb = skb_peek_tail(queue);
1449         if (!skb) {
1450                 exthdrlen = opt ? opt->opt_flen : 0;
1451                 dst_exthdrlen = rt->dst.header_len - rt->rt6i_nfheader_len;
1452         }
1453
1454         paged = !!cork->gso_size;
1455         mtu = cork->gso_size ? IP6_MAX_MTU : cork->fragsize;
1456         orig_mtu = mtu;
1457
1458         if (cork->tx_flags & SKBTX_ANY_SW_TSTAMP &&
1459             sk->sk_tsflags & SOF_TIMESTAMPING_OPT_ID)
1460                 tskey = sk->sk_tskey++;
1461
1462         hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1463
1464         fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1465                         (opt ? opt->opt_nflen : 0);
1466         maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen -
1467                      sizeof(struct frag_hdr);
1468
1469         headersize = sizeof(struct ipv6hdr) +
1470                      (opt ? opt->opt_flen + opt->opt_nflen : 0) +
1471                      (dst_allfrag(&rt->dst) ?
1472                       sizeof(struct frag_hdr) : 0) +
1473                      rt->rt6i_nfheader_len;
1474
1475         /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1476          * the first fragment
1477          */
1478         if (headersize + transhdrlen > mtu)
1479                 goto emsgsize;
1480
1481         if (cork->length + length > mtu - headersize && ipc6->dontfrag &&
1482             (sk->sk_protocol == IPPROTO_UDP ||
1483              sk->sk_protocol == IPPROTO_RAW)) {
1484                 ipv6_local_rxpmtu(sk, fl6, mtu - headersize +
1485                                 sizeof(struct ipv6hdr));
1486                 goto emsgsize;
1487         }
1488
1489         if (ip6_sk_ignore_df(sk))
1490                 maxnonfragsize = sizeof(struct ipv6hdr) + IPV6_MAXPLEN;
1491         else
1492                 maxnonfragsize = mtu;
1493
1494         if (cork->length + length > maxnonfragsize - headersize) {
1495 emsgsize:
1496                 pmtu = max_t(int, mtu - headersize + sizeof(struct ipv6hdr), 0);
1497                 ipv6_local_error(sk, EMSGSIZE, fl6, pmtu);
1498                 return -EMSGSIZE;
1499         }
1500
1501         /* CHECKSUM_PARTIAL only with no extension headers and when
1502          * we are not going to fragment
1503          */
1504         if (transhdrlen && sk->sk_protocol == IPPROTO_UDP &&
1505             headersize == sizeof(struct ipv6hdr) &&
1506             length <= mtu - headersize &&
1507             (!(flags & MSG_MORE) || cork->gso_size) &&
1508             rt->dst.dev->features & (NETIF_F_IPV6_CSUM | NETIF_F_HW_CSUM))
1509                 csummode = CHECKSUM_PARTIAL;
1510
1511         if (flags & MSG_ZEROCOPY && length && sock_flag(sk, SOCK_ZEROCOPY)) {
1512                 uarg = msg_zerocopy_realloc(sk, length, skb_zcopy(skb));
1513                 if (!uarg)
1514                         return -ENOBUFS;
1515                 extra_uref = !skb_zcopy(skb);   /* only ref on new uarg */
1516                 if (rt->dst.dev->features & NETIF_F_SG &&
1517                     csummode == CHECKSUM_PARTIAL) {
1518                         paged = true;
1519                 } else {
1520                         uarg->zerocopy = 0;
1521                         skb_zcopy_set(skb, uarg, &extra_uref);
1522                 }
1523         }
1524
1525         /*
1526          * Let's try using as much space as possible.
1527          * Use MTU if total length of the message fits into the MTU.
1528          * Otherwise, we need to reserve fragment header and
1529          * fragment alignment (= 8-15 octects, in total).
1530          *
1531          * Note that we may need to "move" the data from the tail
1532          * of the buffer to the new fragment when we split
1533          * the message.
1534          *
1535          * FIXME: It may be fragmented into multiple chunks
1536          *        at once if non-fragmentable extension headers
1537          *        are too large.
1538          * --yoshfuji
1539          */
1540
1541         cork->length += length;
1542         if (!skb)
1543                 goto alloc_new_skb;
1544
1545         while (length > 0) {
1546                 /* Check if the remaining data fits into current packet. */
1547                 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1548                 if (copy < length)
1549                         copy = maxfraglen - skb->len;
1550
1551                 if (copy <= 0) {
1552                         char *data;
1553                         unsigned int datalen;
1554                         unsigned int fraglen;
1555                         unsigned int fraggap;
1556                         unsigned int alloclen, alloc_extra;
1557                         unsigned int pagedlen;
1558 alloc_new_skb:
1559                         /* There's no room in the current skb */
1560                         if (skb)
1561                                 fraggap = skb->len - maxfraglen;
1562                         else
1563                                 fraggap = 0;
1564                         /* update mtu and maxfraglen if necessary */
1565                         if (!skb || !skb_prev)
1566                                 ip6_append_data_mtu(&mtu, &maxfraglen,
1567                                                     fragheaderlen, skb, rt,
1568                                                     orig_mtu);
1569
1570                         skb_prev = skb;
1571
1572                         /*
1573                          * If remaining data exceeds the mtu,
1574                          * we know we need more fragment(s).
1575                          */
1576                         datalen = length + fraggap;
1577
1578                         if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1579                                 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1580                         fraglen = datalen + fragheaderlen;
1581                         pagedlen = 0;
1582
1583                         alloc_extra = hh_len;
1584                         alloc_extra += dst_exthdrlen;
1585                         alloc_extra += rt->dst.trailer_len;
1586
1587                         /* We just reserve space for fragment header.
1588                          * Note: this may be overallocation if the message
1589                          * (without MSG_MORE) fits into the MTU.
1590                          */
1591                         alloc_extra += sizeof(struct frag_hdr);
1592
1593                         if ((flags & MSG_MORE) &&
1594                             !(rt->dst.dev->features&NETIF_F_SG))
1595                                 alloclen = mtu;
1596                         else if (!paged &&
1597                                  (fraglen + alloc_extra < SKB_MAX_ALLOC ||
1598                                   !(rt->dst.dev->features & NETIF_F_SG)))
1599                                 alloclen = fraglen;
1600                         else {
1601                                 alloclen = min_t(int, fraglen, MAX_HEADER);
1602                                 pagedlen = fraglen - alloclen;
1603                         }
1604                         alloclen += alloc_extra;
1605
1606                         if (datalen != length + fraggap) {
1607                                 /*
1608                                  * this is not the last fragment, the trailer
1609                                  * space is regarded as data space.
1610                                  */
1611                                 datalen += rt->dst.trailer_len;
1612                         }
1613
1614                         fraglen = datalen + fragheaderlen;
1615
1616                         copy = datalen - transhdrlen - fraggap - pagedlen;
1617                         if (copy < 0) {
1618                                 err = -EINVAL;
1619                                 goto error;
1620                         }
1621                         if (transhdrlen) {
1622                                 skb = sock_alloc_send_skb(sk, alloclen,
1623                                                 (flags & MSG_DONTWAIT), &err);
1624                         } else {
1625                                 skb = NULL;
1626                                 if (refcount_read(&sk->sk_wmem_alloc) + wmem_alloc_delta <=
1627                                     2 * sk->sk_sndbuf)
1628                                         skb = alloc_skb(alloclen,
1629                                                         sk->sk_allocation);
1630                                 if (unlikely(!skb))
1631                                         err = -ENOBUFS;
1632                         }
1633                         if (!skb)
1634                                 goto error;
1635                         /*
1636                          *      Fill in the control structures
1637                          */
1638                         skb->protocol = htons(ETH_P_IPV6);
1639                         skb->ip_summed = csummode;
1640                         skb->csum = 0;
1641                         /* reserve for fragmentation and ipsec header */
1642                         skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1643                                     dst_exthdrlen);
1644
1645                         /*
1646                          *      Find where to start putting bytes
1647                          */
1648                         data = skb_put(skb, fraglen - pagedlen);
1649                         skb_set_network_header(skb, exthdrlen);
1650                         data += fragheaderlen;
1651                         skb->transport_header = (skb->network_header +
1652                                                  fragheaderlen);
1653                         if (fraggap) {
1654                                 skb->csum = skb_copy_and_csum_bits(
1655                                         skb_prev, maxfraglen,
1656                                         data + transhdrlen, fraggap);
1657                                 skb_prev->csum = csum_sub(skb_prev->csum,
1658                                                           skb->csum);
1659                                 data += fraggap;
1660                                 pskb_trim_unique(skb_prev, maxfraglen);
1661                         }
1662                         if (copy > 0 &&
1663                             getfrag(from, data + transhdrlen, offset,
1664                                     copy, fraggap, skb) < 0) {
1665                                 err = -EFAULT;
1666                                 kfree_skb(skb);
1667                                 goto error;
1668                         }
1669
1670                         offset += copy;
1671                         length -= copy + transhdrlen;
1672                         transhdrlen = 0;
1673                         exthdrlen = 0;
1674                         dst_exthdrlen = 0;
1675
1676                         /* Only the initial fragment is time stamped */
1677                         skb_shinfo(skb)->tx_flags = cork->tx_flags;
1678                         cork->tx_flags = 0;
1679                         skb_shinfo(skb)->tskey = tskey;
1680                         tskey = 0;
1681                         skb_zcopy_set(skb, uarg, &extra_uref);
1682
1683                         if ((flags & MSG_CONFIRM) && !skb_prev)
1684                                 skb_set_dst_pending_confirm(skb, 1);
1685
1686                         /*
1687                          * Put the packet on the pending queue
1688                          */
1689                         if (!skb->destructor) {
1690                                 skb->destructor = sock_wfree;
1691                                 skb->sk = sk;
1692                                 wmem_alloc_delta += skb->truesize;
1693                         }
1694                         __skb_queue_tail(queue, skb);
1695                         continue;
1696                 }
1697
1698                 if (copy > length)
1699                         copy = length;
1700
1701                 if (!(rt->dst.dev->features&NETIF_F_SG) &&
1702                     skb_tailroom(skb) >= copy) {
1703                         unsigned int off;
1704
1705                         off = skb->len;
1706                         if (getfrag(from, skb_put(skb, copy),
1707                                                 offset, copy, off, skb) < 0) {
1708                                 __skb_trim(skb, off);
1709                                 err = -EFAULT;
1710                                 goto error;
1711                         }
1712                 } else if (!uarg || !uarg->zerocopy) {
1713                         int i = skb_shinfo(skb)->nr_frags;
1714
1715                         err = -ENOMEM;
1716                         if (!sk_page_frag_refill(sk, pfrag))
1717                                 goto error;
1718
1719                         if (!skb_can_coalesce(skb, i, pfrag->page,
1720                                               pfrag->offset)) {
1721                                 err = -EMSGSIZE;
1722                                 if (i == MAX_SKB_FRAGS)
1723                                         goto error;
1724
1725                                 __skb_fill_page_desc(skb, i, pfrag->page,
1726                                                      pfrag->offset, 0);
1727                                 skb_shinfo(skb)->nr_frags = ++i;
1728                                 get_page(pfrag->page);
1729                         }
1730                         copy = min_t(int, copy, pfrag->size - pfrag->offset);
1731                         if (getfrag(from,
1732                                     page_address(pfrag->page) + pfrag->offset,
1733                                     offset, copy, skb->len, skb) < 0)
1734                                 goto error_efault;
1735
1736                         pfrag->offset += copy;
1737                         skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1738                         skb->len += copy;
1739                         skb->data_len += copy;
1740                         skb->truesize += copy;
1741                         wmem_alloc_delta += copy;
1742                 } else {
1743                         err = skb_zerocopy_iter_dgram(skb, from, copy);
1744                         if (err < 0)
1745                                 goto error;
1746                 }
1747                 offset += copy;
1748                 length -= copy;
1749         }
1750
1751         if (wmem_alloc_delta)
1752                 refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1753         return 0;
1754
1755 error_efault:
1756         err = -EFAULT;
1757 error:
1758         net_zcopy_put_abort(uarg, extra_uref);
1759         cork->length -= length;
1760         IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1761         refcount_add(wmem_alloc_delta, &sk->sk_wmem_alloc);
1762         return err;
1763 }
1764
1765 int ip6_append_data(struct sock *sk,
1766                     int getfrag(void *from, char *to, int offset, int len,
1767                                 int odd, struct sk_buff *skb),
1768                     void *from, int length, int transhdrlen,
1769                     struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1770                     struct rt6_info *rt, unsigned int flags)
1771 {
1772         struct inet_sock *inet = inet_sk(sk);
1773         struct ipv6_pinfo *np = inet6_sk(sk);
1774         int exthdrlen;
1775         int err;
1776
1777         if (flags&MSG_PROBE)
1778                 return 0;
1779         if (skb_queue_empty(&sk->sk_write_queue)) {
1780                 /*
1781                  * setup for corking
1782                  */
1783                 err = ip6_setup_cork(sk, &inet->cork, &np->cork,
1784                                      ipc6, rt, fl6);
1785                 if (err)
1786                         return err;
1787
1788                 exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1789                 length += exthdrlen;
1790                 transhdrlen += exthdrlen;
1791         } else {
1792                 fl6 = &inet->cork.fl.u.ip6;
1793                 transhdrlen = 0;
1794         }
1795
1796         return __ip6_append_data(sk, fl6, &sk->sk_write_queue, &inet->cork.base,
1797                                  &np->cork, sk_page_frag(sk), getfrag,
1798                                  from, length, transhdrlen, flags, ipc6);
1799 }
1800 EXPORT_SYMBOL_GPL(ip6_append_data);
1801
1802 static void ip6_cork_release(struct inet_cork_full *cork,
1803                              struct inet6_cork *v6_cork)
1804 {
1805         if (v6_cork->opt) {
1806                 kfree(v6_cork->opt->dst0opt);
1807                 kfree(v6_cork->opt->dst1opt);
1808                 kfree(v6_cork->opt->hopopt);
1809                 kfree(v6_cork->opt->srcrt);
1810                 kfree(v6_cork->opt);
1811                 v6_cork->opt = NULL;
1812         }
1813
1814         if (cork->base.dst) {
1815                 dst_release(cork->base.dst);
1816                 cork->base.dst = NULL;
1817                 cork->base.flags &= ~IPCORK_ALLFRAG;
1818         }
1819         memset(&cork->fl, 0, sizeof(cork->fl));
1820 }
1821
1822 struct sk_buff *__ip6_make_skb(struct sock *sk,
1823                                struct sk_buff_head *queue,
1824                                struct inet_cork_full *cork,
1825                                struct inet6_cork *v6_cork)
1826 {
1827         struct sk_buff *skb, *tmp_skb;
1828         struct sk_buff **tail_skb;
1829         struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1830         struct ipv6_pinfo *np = inet6_sk(sk);
1831         struct net *net = sock_net(sk);
1832         struct ipv6hdr *hdr;
1833         struct ipv6_txoptions *opt = v6_cork->opt;
1834         struct rt6_info *rt = (struct rt6_info *)cork->base.dst;
1835         struct flowi6 *fl6 = &cork->fl.u.ip6;
1836         unsigned char proto = fl6->flowi6_proto;
1837
1838         skb = __skb_dequeue(queue);
1839         if (!skb)
1840                 goto out;
1841         tail_skb = &(skb_shinfo(skb)->frag_list);
1842
1843         /* move skb->data to ip header from ext header */
1844         if (skb->data < skb_network_header(skb))
1845                 __skb_pull(skb, skb_network_offset(skb));
1846         while ((tmp_skb = __skb_dequeue(queue)) != NULL) {
1847                 __skb_pull(tmp_skb, skb_network_header_len(skb));
1848                 *tail_skb = tmp_skb;
1849                 tail_skb = &(tmp_skb->next);
1850                 skb->len += tmp_skb->len;
1851                 skb->data_len += tmp_skb->len;
1852                 skb->truesize += tmp_skb->truesize;
1853                 tmp_skb->destructor = NULL;
1854                 tmp_skb->sk = NULL;
1855         }
1856
1857         /* Allow local fragmentation. */
1858         skb->ignore_df = ip6_sk_ignore_df(sk);
1859
1860         *final_dst = fl6->daddr;
1861         __skb_pull(skb, skb_network_header_len(skb));
1862         if (opt && opt->opt_flen)
1863                 ipv6_push_frag_opts(skb, opt, &proto);
1864         if (opt && opt->opt_nflen)
1865                 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst, &fl6->saddr);
1866
1867         skb_push(skb, sizeof(struct ipv6hdr));
1868         skb_reset_network_header(skb);
1869         hdr = ipv6_hdr(skb);
1870
1871         ip6_flow_hdr(hdr, v6_cork->tclass,
1872                      ip6_make_flowlabel(net, skb, fl6->flowlabel,
1873                                         ip6_autoflowlabel(net, np), fl6));
1874         hdr->hop_limit = v6_cork->hop_limit;
1875         hdr->nexthdr = proto;
1876         hdr->saddr = fl6->saddr;
1877         hdr->daddr = *final_dst;
1878
1879         skb->priority = sk->sk_priority;
1880         skb->mark = cork->base.mark;
1881
1882         skb->tstamp = cork->base.transmit_time;
1883
1884         skb_dst_set(skb, dst_clone(&rt->dst));
1885         IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1886         if (proto == IPPROTO_ICMPV6) {
1887                 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1888
1889                 ICMP6MSGOUT_INC_STATS(net, idev, icmp6_hdr(skb)->icmp6_type);
1890                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS);
1891         }
1892
1893         ip6_cork_release(cork, v6_cork);
1894 out:
1895         return skb;
1896 }
1897
1898 int ip6_send_skb(struct sk_buff *skb)
1899 {
1900         struct net *net = sock_net(skb->sk);
1901         struct rt6_info *rt = (struct rt6_info *)skb_dst(skb);
1902         int err;
1903
1904         err = ip6_local_out(net, skb->sk, skb);
1905         if (err) {
1906                 if (err > 0)
1907                         err = net_xmit_errno(err);
1908                 if (err)
1909                         IP6_INC_STATS(net, rt->rt6i_idev,
1910                                       IPSTATS_MIB_OUTDISCARDS);
1911         }
1912
1913         return err;
1914 }
1915
1916 int ip6_push_pending_frames(struct sock *sk)
1917 {
1918         struct sk_buff *skb;
1919
1920         skb = ip6_finish_skb(sk);
1921         if (!skb)
1922                 return 0;
1923
1924         return ip6_send_skb(skb);
1925 }
1926 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1927
1928 static void __ip6_flush_pending_frames(struct sock *sk,
1929                                        struct sk_buff_head *queue,
1930                                        struct inet_cork_full *cork,
1931                                        struct inet6_cork *v6_cork)
1932 {
1933         struct sk_buff *skb;
1934
1935         while ((skb = __skb_dequeue_tail(queue)) != NULL) {
1936                 if (skb_dst(skb))
1937                         IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1938                                       IPSTATS_MIB_OUTDISCARDS);
1939                 kfree_skb(skb);
1940         }
1941
1942         ip6_cork_release(cork, v6_cork);
1943 }
1944
1945 void ip6_flush_pending_frames(struct sock *sk)
1946 {
1947         __ip6_flush_pending_frames(sk, &sk->sk_write_queue,
1948                                    &inet_sk(sk)->cork, &inet6_sk(sk)->cork);
1949 }
1950 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);
1951
1952 struct sk_buff *ip6_make_skb(struct sock *sk,
1953                              int getfrag(void *from, char *to, int offset,
1954                                          int len, int odd, struct sk_buff *skb),
1955                              void *from, int length, int transhdrlen,
1956                              struct ipcm6_cookie *ipc6, struct flowi6 *fl6,
1957                              struct rt6_info *rt, unsigned int flags,
1958                              struct inet_cork_full *cork)
1959 {
1960         struct inet6_cork v6_cork;
1961         struct sk_buff_head queue;
1962         int exthdrlen = (ipc6->opt ? ipc6->opt->opt_flen : 0);
1963         int err;
1964
1965         if (flags & MSG_PROBE)
1966                 return NULL;
1967
1968         __skb_queue_head_init(&queue);
1969
1970         cork->base.flags = 0;
1971         cork->base.addr = 0;
1972         cork->base.opt = NULL;
1973         cork->base.dst = NULL;
1974         v6_cork.opt = NULL;
1975         err = ip6_setup_cork(sk, cork, &v6_cork, ipc6, rt, fl6);
1976         if (err) {
1977                 ip6_cork_release(cork, &v6_cork);
1978                 return ERR_PTR(err);
1979         }
1980         if (ipc6->dontfrag < 0)
1981                 ipc6->dontfrag = inet6_sk(sk)->dontfrag;
1982
1983         err = __ip6_append_data(sk, fl6, &queue, &cork->base, &v6_cork,
1984                                 &current->task_frag, getfrag, from,
1985                                 length + exthdrlen, transhdrlen + exthdrlen,
1986                                 flags, ipc6);
1987         if (err) {
1988                 __ip6_flush_pending_frames(sk, &queue, cork, &v6_cork);
1989                 return ERR_PTR(err);
1990         }
1991
1992         return __ip6_make_skb(sk, &queue, cork, &v6_cork);
1993 }