net: ipv6: support reporting otherwise unknown prefix flags in RTM_NEWPREFIX
[platform/kernel/linux-starfive.git] / net / ipv6 / icmp.c
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  *      Internet Control Message Protocol (ICMPv6)
4  *      Linux INET6 implementation
5  *
6  *      Authors:
7  *      Pedro Roque             <roque@di.fc.ul.pt>
8  *
9  *      Based on net/ipv4/icmp.c
10  *
11  *      RFC 1885
12  */
13
14 /*
15  *      Changes:
16  *
17  *      Andi Kleen              :       exception handling
18  *      Andi Kleen                      add rate limits. never reply to a icmp.
19  *                                      add more length checks and other fixes.
20  *      yoshfuji                :       ensure to sent parameter problem for
21  *                                      fragments.
22  *      YOSHIFUJI Hideaki @USAGI:       added sysctl for icmp rate limit.
23  *      Randy Dunlap and
24  *      YOSHIFUJI Hideaki @USAGI:       Per-interface statistics support
25  *      Kazunori MIYAZAWA @USAGI:       change output process to use ip6_append_data
26  */
27
28 #define pr_fmt(fmt) "IPv6: " fmt
29
30 #include <linux/module.h>
31 #include <linux/errno.h>
32 #include <linux/types.h>
33 #include <linux/socket.h>
34 #include <linux/in.h>
35 #include <linux/kernel.h>
36 #include <linux/sockios.h>
37 #include <linux/net.h>
38 #include <linux/skbuff.h>
39 #include <linux/init.h>
40 #include <linux/netfilter.h>
41 #include <linux/slab.h>
42
43 #ifdef CONFIG_SYSCTL
44 #include <linux/sysctl.h>
45 #endif
46
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/icmpv6.h>
50
51 #include <net/ip.h>
52 #include <net/sock.h>
53
54 #include <net/ipv6.h>
55 #include <net/ip6_checksum.h>
56 #include <net/ping.h>
57 #include <net/protocol.h>
58 #include <net/raw.h>
59 #include <net/rawv6.h>
60 #include <net/seg6.h>
61 #include <net/transp_v6.h>
62 #include <net/ip6_route.h>
63 #include <net/addrconf.h>
64 #include <net/icmp.h>
65 #include <net/xfrm.h>
66 #include <net/inet_common.h>
67 #include <net/dsfield.h>
68 #include <net/l3mdev.h>
69
70 #include <linux/uaccess.h>
71
72 static DEFINE_PER_CPU(struct sock *, ipv6_icmp_sk);
73
74 static int icmpv6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
75                        u8 type, u8 code, int offset, __be32 info)
76 {
77         /* icmpv6_notify checks 8 bytes can be pulled, icmp6hdr is 8 bytes */
78         struct icmp6hdr *icmp6 = (struct icmp6hdr *) (skb->data + offset);
79         struct net *net = dev_net(skb->dev);
80
81         if (type == ICMPV6_PKT_TOOBIG)
82                 ip6_update_pmtu(skb, net, info, skb->dev->ifindex, 0, sock_net_uid(net, NULL));
83         else if (type == NDISC_REDIRECT)
84                 ip6_redirect(skb, net, skb->dev->ifindex, 0,
85                              sock_net_uid(net, NULL));
86
87         if (!(type & ICMPV6_INFOMSG_MASK))
88                 if (icmp6->icmp6_type == ICMPV6_ECHO_REQUEST)
89                         ping_err(skb, offset, ntohl(info));
90
91         return 0;
92 }
93
94 static int icmpv6_rcv(struct sk_buff *skb);
95
96 static const struct inet6_protocol icmpv6_protocol = {
97         .handler        =       icmpv6_rcv,
98         .err_handler    =       icmpv6_err,
99         .flags          =       INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
100 };
101
102 /* Called with BH disabled */
103 static struct sock *icmpv6_xmit_lock(struct net *net)
104 {
105         struct sock *sk;
106
107         sk = this_cpu_read(ipv6_icmp_sk);
108         if (unlikely(!spin_trylock(&sk->sk_lock.slock))) {
109                 /* This can happen if the output path (f.e. SIT or
110                  * ip6ip6 tunnel) signals dst_link_failure() for an
111                  * outgoing ICMP6 packet.
112                  */
113                 return NULL;
114         }
115         sock_net_set(sk, net);
116         return sk;
117 }
118
119 static void icmpv6_xmit_unlock(struct sock *sk)
120 {
121         sock_net_set(sk, &init_net);
122         spin_unlock(&sk->sk_lock.slock);
123 }
124
125 /*
126  * Figure out, may we reply to this packet with icmp error.
127  *
128  * We do not reply, if:
129  *      - it was icmp error message.
130  *      - it is truncated, so that it is known, that protocol is ICMPV6
131  *        (i.e. in the middle of some exthdr)
132  *
133  *      --ANK (980726)
134  */
135
136 static bool is_ineligible(const struct sk_buff *skb)
137 {
138         int ptr = (u8 *)(ipv6_hdr(skb) + 1) - skb->data;
139         int len = skb->len - ptr;
140         __u8 nexthdr = ipv6_hdr(skb)->nexthdr;
141         __be16 frag_off;
142
143         if (len < 0)
144                 return true;
145
146         ptr = ipv6_skip_exthdr(skb, ptr, &nexthdr, &frag_off);
147         if (ptr < 0)
148                 return false;
149         if (nexthdr == IPPROTO_ICMPV6) {
150                 u8 _type, *tp;
151                 tp = skb_header_pointer(skb,
152                         ptr+offsetof(struct icmp6hdr, icmp6_type),
153                         sizeof(_type), &_type);
154
155                 /* Based on RFC 8200, Section 4.5 Fragment Header, return
156                  * false if this is a fragment packet with no icmp header info.
157                  */
158                 if (!tp && frag_off != 0)
159                         return false;
160                 else if (!tp || !(*tp & ICMPV6_INFOMSG_MASK))
161                         return true;
162         }
163         return false;
164 }
165
166 static bool icmpv6_mask_allow(struct net *net, int type)
167 {
168         if (type > ICMPV6_MSG_MAX)
169                 return true;
170
171         /* Limit if icmp type is set in ratemask. */
172         if (!test_bit(type, net->ipv6.sysctl.icmpv6_ratemask))
173                 return true;
174
175         return false;
176 }
177
178 static bool icmpv6_global_allow(struct net *net, int type)
179 {
180         if (icmpv6_mask_allow(net, type))
181                 return true;
182
183         if (icmp_global_allow())
184                 return true;
185
186         __ICMP_INC_STATS(net, ICMP_MIB_RATELIMITGLOBAL);
187         return false;
188 }
189
190 /*
191  * Check the ICMP output rate limit
192  */
193 static bool icmpv6_xrlim_allow(struct sock *sk, u8 type,
194                                struct flowi6 *fl6)
195 {
196         struct net *net = sock_net(sk);
197         struct dst_entry *dst;
198         bool res = false;
199
200         if (icmpv6_mask_allow(net, type))
201                 return true;
202
203         /*
204          * Look up the output route.
205          * XXX: perhaps the expire for routing entries cloned by
206          * this lookup should be more aggressive (not longer than timeout).
207          */
208         dst = ip6_route_output(net, sk, fl6);
209         if (dst->error) {
210                 IP6_INC_STATS(net, ip6_dst_idev(dst),
211                               IPSTATS_MIB_OUTNOROUTES);
212         } else if (dst->dev && (dst->dev->flags&IFF_LOOPBACK)) {
213                 res = true;
214         } else {
215                 struct rt6_info *rt = (struct rt6_info *)dst;
216                 int tmo = net->ipv6.sysctl.icmpv6_time;
217                 struct inet_peer *peer;
218
219                 /* Give more bandwidth to wider prefixes. */
220                 if (rt->rt6i_dst.plen < 128)
221                         tmo >>= ((128 - rt->rt6i_dst.plen)>>5);
222
223                 peer = inet_getpeer_v6(net->ipv6.peers, &fl6->daddr, 1);
224                 res = inet_peer_xrlim_allow(peer, tmo);
225                 if (peer)
226                         inet_putpeer(peer);
227         }
228         if (!res)
229                 __ICMP6_INC_STATS(net, ip6_dst_idev(dst),
230                                   ICMP6_MIB_RATELIMITHOST);
231         dst_release(dst);
232         return res;
233 }
234
235 static bool icmpv6_rt_has_prefsrc(struct sock *sk, u8 type,
236                                   struct flowi6 *fl6)
237 {
238         struct net *net = sock_net(sk);
239         struct dst_entry *dst;
240         bool res = false;
241
242         dst = ip6_route_output(net, sk, fl6);
243         if (!dst->error) {
244                 struct rt6_info *rt = (struct rt6_info *)dst;
245                 struct in6_addr prefsrc;
246
247                 rt6_get_prefsrc(rt, &prefsrc);
248                 res = !ipv6_addr_any(&prefsrc);
249         }
250         dst_release(dst);
251         return res;
252 }
253
254 /*
255  *      an inline helper for the "simple" if statement below
256  *      checks if parameter problem report is caused by an
257  *      unrecognized IPv6 option that has the Option Type
258  *      highest-order two bits set to 10
259  */
260
261 static bool opt_unrec(struct sk_buff *skb, __u32 offset)
262 {
263         u8 _optval, *op;
264
265         offset += skb_network_offset(skb);
266         op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval);
267         if (!op)
268                 return true;
269         return (*op & 0xC0) == 0x80;
270 }
271
272 void icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6,
273                                 struct icmp6hdr *thdr, int len)
274 {
275         struct sk_buff *skb;
276         struct icmp6hdr *icmp6h;
277
278         skb = skb_peek(&sk->sk_write_queue);
279         if (!skb)
280                 return;
281
282         icmp6h = icmp6_hdr(skb);
283         memcpy(icmp6h, thdr, sizeof(struct icmp6hdr));
284         icmp6h->icmp6_cksum = 0;
285
286         if (skb_queue_len(&sk->sk_write_queue) == 1) {
287                 skb->csum = csum_partial(icmp6h,
288                                         sizeof(struct icmp6hdr), skb->csum);
289                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
290                                                       &fl6->daddr,
291                                                       len, fl6->flowi6_proto,
292                                                       skb->csum);
293         } else {
294                 __wsum tmp_csum = 0;
295
296                 skb_queue_walk(&sk->sk_write_queue, skb) {
297                         tmp_csum = csum_add(tmp_csum, skb->csum);
298                 }
299
300                 tmp_csum = csum_partial(icmp6h,
301                                         sizeof(struct icmp6hdr), tmp_csum);
302                 icmp6h->icmp6_cksum = csum_ipv6_magic(&fl6->saddr,
303                                                       &fl6->daddr,
304                                                       len, fl6->flowi6_proto,
305                                                       tmp_csum);
306         }
307         ip6_push_pending_frames(sk);
308 }
309
310 struct icmpv6_msg {
311         struct sk_buff  *skb;
312         int             offset;
313         uint8_t         type;
314 };
315
316 static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb)
317 {
318         struct icmpv6_msg *msg = (struct icmpv6_msg *) from;
319         struct sk_buff *org_skb = msg->skb;
320         __wsum csum;
321
322         csum = skb_copy_and_csum_bits(org_skb, msg->offset + offset,
323                                       to, len);
324         skb->csum = csum_block_add(skb->csum, csum, odd);
325         if (!(msg->type & ICMPV6_INFOMSG_MASK))
326                 nf_ct_attach(skb, org_skb);
327         return 0;
328 }
329
330 #if IS_ENABLED(CONFIG_IPV6_MIP6)
331 static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
332 {
333         struct ipv6hdr *iph = ipv6_hdr(skb);
334         struct ipv6_destopt_hao *hao;
335         int off;
336
337         if (opt->dsthao) {
338                 off = ipv6_find_tlv(skb, opt->dsthao, IPV6_TLV_HAO);
339                 if (likely(off >= 0)) {
340                         hao = (struct ipv6_destopt_hao *)
341                                         (skb_network_header(skb) + off);
342                         swap(iph->saddr, hao->addr);
343                 }
344         }
345 }
346 #else
347 static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
348 #endif
349
350 static struct dst_entry *icmpv6_route_lookup(struct net *net,
351                                              struct sk_buff *skb,
352                                              struct sock *sk,
353                                              struct flowi6 *fl6)
354 {
355         struct dst_entry *dst, *dst2;
356         struct flowi6 fl2;
357         int err;
358
359         err = ip6_dst_lookup(net, sk, &dst, fl6);
360         if (err)
361                 return ERR_PTR(err);
362
363         /*
364          * We won't send icmp if the destination is known
365          * anycast unless we need to treat anycast as unicast.
366          */
367         if (!READ_ONCE(net->ipv6.sysctl.icmpv6_error_anycast_as_unicast) &&
368             ipv6_anycast_destination(dst, &fl6->daddr)) {
369                 net_dbg_ratelimited("icmp6_send: acast source\n");
370                 dst_release(dst);
371                 return ERR_PTR(-EINVAL);
372         }
373
374         /* No need to clone since we're just using its address. */
375         dst2 = dst;
376
377         dst = xfrm_lookup(net, dst, flowi6_to_flowi(fl6), sk, 0);
378         if (!IS_ERR(dst)) {
379                 if (dst != dst2)
380                         return dst;
381         } else {
382                 if (PTR_ERR(dst) == -EPERM)
383                         dst = NULL;
384                 else
385                         return dst;
386         }
387
388         err = xfrm_decode_session_reverse(skb, flowi6_to_flowi(&fl2), AF_INET6);
389         if (err)
390                 goto relookup_failed;
391
392         err = ip6_dst_lookup(net, sk, &dst2, &fl2);
393         if (err)
394                 goto relookup_failed;
395
396         dst2 = xfrm_lookup(net, dst2, flowi6_to_flowi(&fl2), sk, XFRM_LOOKUP_ICMP);
397         if (!IS_ERR(dst2)) {
398                 dst_release(dst);
399                 dst = dst2;
400         } else {
401                 err = PTR_ERR(dst2);
402                 if (err == -EPERM) {
403                         dst_release(dst);
404                         return dst2;
405                 } else
406                         goto relookup_failed;
407         }
408
409 relookup_failed:
410         if (dst)
411                 return dst;
412         return ERR_PTR(err);
413 }
414
415 static struct net_device *icmp6_dev(const struct sk_buff *skb)
416 {
417         struct net_device *dev = skb->dev;
418
419         /* for local traffic to local address, skb dev is the loopback
420          * device. Check if there is a dst attached to the skb and if so
421          * get the real device index. Same is needed for replies to a link
422          * local address on a device enslaved to an L3 master device
423          */
424         if (unlikely(dev->ifindex == LOOPBACK_IFINDEX || netif_is_l3_master(skb->dev))) {
425                 const struct rt6_info *rt6 = skb_rt6_info(skb);
426
427                 /* The destination could be an external IP in Ext Hdr (SRv6, RPL, etc.),
428                  * and ip6_null_entry could be set to skb if no route is found.
429                  */
430                 if (rt6 && rt6->rt6i_idev)
431                         dev = rt6->rt6i_idev->dev;
432         }
433
434         return dev;
435 }
436
437 static int icmp6_iif(const struct sk_buff *skb)
438 {
439         return icmp6_dev(skb)->ifindex;
440 }
441
442 /*
443  *      Send an ICMP message in response to a packet in error
444  */
445 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
446                 const struct in6_addr *force_saddr,
447                 const struct inet6_skb_parm *parm)
448 {
449         struct inet6_dev *idev = NULL;
450         struct ipv6hdr *hdr = ipv6_hdr(skb);
451         struct sock *sk;
452         struct net *net;
453         struct ipv6_pinfo *np;
454         const struct in6_addr *saddr = NULL;
455         struct dst_entry *dst;
456         struct icmp6hdr tmp_hdr;
457         struct flowi6 fl6;
458         struct icmpv6_msg msg;
459         struct ipcm6_cookie ipc6;
460         int iif = 0;
461         int addr_type = 0;
462         int len;
463         u32 mark;
464
465         if ((u8 *)hdr < skb->head ||
466             (skb_network_header(skb) + sizeof(*hdr)) > skb_tail_pointer(skb))
467                 return;
468
469         if (!skb->dev)
470                 return;
471         net = dev_net(skb->dev);
472         mark = IP6_REPLY_MARK(net, skb->mark);
473         /*
474          *      Make sure we respect the rules
475          *      i.e. RFC 1885 2.4(e)
476          *      Rule (e.1) is enforced by not using icmp6_send
477          *      in any code that processes icmp errors.
478          */
479         addr_type = ipv6_addr_type(&hdr->daddr);
480
481         if (ipv6_chk_addr(net, &hdr->daddr, skb->dev, 0) ||
482             ipv6_chk_acast_addr_src(net, skb->dev, &hdr->daddr))
483                 saddr = &hdr->daddr;
484
485         /*
486          *      Dest addr check
487          */
488
489         if (addr_type & IPV6_ADDR_MULTICAST || skb->pkt_type != PACKET_HOST) {
490                 if (type != ICMPV6_PKT_TOOBIG &&
491                     !(type == ICMPV6_PARAMPROB &&
492                       code == ICMPV6_UNK_OPTION &&
493                       (opt_unrec(skb, info))))
494                         return;
495
496                 saddr = NULL;
497         }
498
499         addr_type = ipv6_addr_type(&hdr->saddr);
500
501         /*
502          *      Source addr check
503          */
504
505         if (__ipv6_addr_needs_scope_id(addr_type)) {
506                 iif = icmp6_iif(skb);
507         } else {
508                 /*
509                  * The source device is used for looking up which routing table
510                  * to use for sending an ICMP error.
511                  */
512                 iif = l3mdev_master_ifindex(skb->dev);
513         }
514
515         /*
516          *      Must not send error if the source does not uniquely
517          *      identify a single node (RFC2463 Section 2.4).
518          *      We check unspecified / multicast addresses here,
519          *      and anycast addresses will be checked later.
520          */
521         if ((addr_type == IPV6_ADDR_ANY) || (addr_type & IPV6_ADDR_MULTICAST)) {
522                 net_dbg_ratelimited("icmp6_send: addr_any/mcast source [%pI6c > %pI6c]\n",
523                                     &hdr->saddr, &hdr->daddr);
524                 return;
525         }
526
527         /*
528          *      Never answer to a ICMP packet.
529          */
530         if (is_ineligible(skb)) {
531                 net_dbg_ratelimited("icmp6_send: no reply to icmp error [%pI6c > %pI6c]\n",
532                                     &hdr->saddr, &hdr->daddr);
533                 return;
534         }
535
536         /* Needed by both icmp_global_allow and icmpv6_xmit_lock */
537         local_bh_disable();
538
539         /* Check global sysctl_icmp_msgs_per_sec ratelimit */
540         if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
541                 goto out_bh_enable;
542
543         mip6_addr_swap(skb, parm);
544
545         sk = icmpv6_xmit_lock(net);
546         if (!sk)
547                 goto out_bh_enable;
548
549         memset(&fl6, 0, sizeof(fl6));
550         fl6.flowi6_proto = IPPROTO_ICMPV6;
551         fl6.daddr = hdr->saddr;
552         if (force_saddr)
553                 saddr = force_saddr;
554         if (saddr) {
555                 fl6.saddr = *saddr;
556         } else if (!icmpv6_rt_has_prefsrc(sk, type, &fl6)) {
557                 /* select a more meaningful saddr from input if */
558                 struct net_device *in_netdev;
559
560                 in_netdev = dev_get_by_index(net, parm->iif);
561                 if (in_netdev) {
562                         ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
563                                            inet6_sk(sk)->srcprefs,
564                                            &fl6.saddr);
565                         dev_put(in_netdev);
566                 }
567         }
568         fl6.flowi6_mark = mark;
569         fl6.flowi6_oif = iif;
570         fl6.fl6_icmp_type = type;
571         fl6.fl6_icmp_code = code;
572         fl6.flowi6_uid = sock_net_uid(net, NULL);
573         fl6.mp_hash = rt6_multipath_hash(net, &fl6, skb, NULL);
574         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
575
576         np = inet6_sk(sk);
577
578         if (!icmpv6_xrlim_allow(sk, type, &fl6))
579                 goto out;
580
581         tmp_hdr.icmp6_type = type;
582         tmp_hdr.icmp6_code = code;
583         tmp_hdr.icmp6_cksum = 0;
584         tmp_hdr.icmp6_pointer = htonl(info);
585
586         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
587                 fl6.flowi6_oif = np->mcast_oif;
588         else if (!fl6.flowi6_oif)
589                 fl6.flowi6_oif = np->ucast_oif;
590
591         ipcm6_init_sk(&ipc6, np);
592         ipc6.sockc.mark = mark;
593         fl6.flowlabel = ip6_make_flowinfo(ipc6.tclass, fl6.flowlabel);
594
595         dst = icmpv6_route_lookup(net, skb, sk, &fl6);
596         if (IS_ERR(dst))
597                 goto out;
598
599         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
600
601         msg.skb = skb;
602         msg.offset = skb_network_offset(skb);
603         msg.type = type;
604
605         len = skb->len - msg.offset;
606         len = min_t(unsigned int, len, IPV6_MIN_MTU - sizeof(struct ipv6hdr) - sizeof(struct icmp6hdr));
607         if (len < 0) {
608                 net_dbg_ratelimited("icmp: len problem [%pI6c > %pI6c]\n",
609                                     &hdr->saddr, &hdr->daddr);
610                 goto out_dst_release;
611         }
612
613         rcu_read_lock();
614         idev = __in6_dev_get(skb->dev);
615
616         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
617                             len + sizeof(struct icmp6hdr),
618                             sizeof(struct icmp6hdr),
619                             &ipc6, &fl6, (struct rt6_info *)dst,
620                             MSG_DONTWAIT)) {
621                 ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
622                 ip6_flush_pending_frames(sk);
623         } else {
624                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
625                                            len + sizeof(struct icmp6hdr));
626         }
627         rcu_read_unlock();
628 out_dst_release:
629         dst_release(dst);
630 out:
631         icmpv6_xmit_unlock(sk);
632 out_bh_enable:
633         local_bh_enable();
634 }
635 EXPORT_SYMBOL(icmp6_send);
636
637 /* Slightly more convenient version of icmp6_send with drop reasons.
638  */
639 void icmpv6_param_prob_reason(struct sk_buff *skb, u8 code, int pos,
640                               enum skb_drop_reason reason)
641 {
642         icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
643         kfree_skb_reason(skb, reason);
644 }
645
646 /* Generate icmpv6 with type/code ICMPV6_DEST_UNREACH/ICMPV6_ADDR_UNREACH
647  * if sufficient data bytes are available
648  * @nhs is the size of the tunnel header(s) :
649  *  Either an IPv4 header for SIT encap
650  *         an IPv4 header + GRE header for GRE encap
651  */
652 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
653                                unsigned int data_len)
654 {
655         struct in6_addr temp_saddr;
656         struct rt6_info *rt;
657         struct sk_buff *skb2;
658         u32 info = 0;
659
660         if (!pskb_may_pull(skb, nhs + sizeof(struct ipv6hdr) + 8))
661                 return 1;
662
663         /* RFC 4884 (partial) support for ICMP extensions */
664         if (data_len < 128 || (data_len & 7) || skb->len < data_len)
665                 data_len = 0;
666
667         skb2 = data_len ? skb_copy(skb, GFP_ATOMIC) : skb_clone(skb, GFP_ATOMIC);
668
669         if (!skb2)
670                 return 1;
671
672         skb_dst_drop(skb2);
673         skb_pull(skb2, nhs);
674         skb_reset_network_header(skb2);
675
676         rt = rt6_lookup(dev_net(skb->dev), &ipv6_hdr(skb2)->saddr, NULL, 0,
677                         skb, 0);
678
679         if (rt && rt->dst.dev)
680                 skb2->dev = rt->dst.dev;
681
682         ipv6_addr_set_v4mapped(ip_hdr(skb)->saddr, &temp_saddr);
683
684         if (data_len) {
685                 /* RFC 4884 (partial) support :
686                  * insert 0 padding at the end, before the extensions
687                  */
688                 __skb_push(skb2, nhs);
689                 skb_reset_network_header(skb2);
690                 memmove(skb2->data, skb2->data + nhs, data_len - nhs);
691                 memset(skb2->data + data_len - nhs, 0, nhs);
692                 /* RFC 4884 4.5 : Length is measured in 64-bit words,
693                  * and stored in reserved[0]
694                  */
695                 info = (data_len/8) << 24;
696         }
697         if (type == ICMP_TIME_EXCEEDED)
698                 icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
699                            info, &temp_saddr, IP6CB(skb2));
700         else
701                 icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
702                            info, &temp_saddr, IP6CB(skb2));
703         if (rt)
704                 ip6_rt_put(rt);
705
706         kfree_skb(skb2);
707
708         return 0;
709 }
710 EXPORT_SYMBOL(ip6_err_gen_icmpv6_unreach);
711
712 static enum skb_drop_reason icmpv6_echo_reply(struct sk_buff *skb)
713 {
714         struct net *net = dev_net(skb->dev);
715         struct sock *sk;
716         struct inet6_dev *idev;
717         struct ipv6_pinfo *np;
718         const struct in6_addr *saddr = NULL;
719         struct icmp6hdr *icmph = icmp6_hdr(skb);
720         struct icmp6hdr tmp_hdr;
721         struct flowi6 fl6;
722         struct icmpv6_msg msg;
723         struct dst_entry *dst;
724         struct ipcm6_cookie ipc6;
725         u32 mark = IP6_REPLY_MARK(net, skb->mark);
726         SKB_DR(reason);
727         bool acast;
728         u8 type;
729
730         if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr) &&
731             net->ipv6.sysctl.icmpv6_echo_ignore_multicast)
732                 return reason;
733
734         saddr = &ipv6_hdr(skb)->daddr;
735
736         acast = ipv6_anycast_destination(skb_dst(skb), saddr);
737         if (acast && net->ipv6.sysctl.icmpv6_echo_ignore_anycast)
738                 return reason;
739
740         if (!ipv6_unicast_destination(skb) &&
741             !(net->ipv6.sysctl.anycast_src_echo_reply && acast))
742                 saddr = NULL;
743
744         if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
745                 type = ICMPV6_EXT_ECHO_REPLY;
746         else
747                 type = ICMPV6_ECHO_REPLY;
748
749         memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr));
750         tmp_hdr.icmp6_type = type;
751
752         memset(&fl6, 0, sizeof(fl6));
753         if (net->ipv6.sysctl.flowlabel_reflect & FLOWLABEL_REFLECT_ICMPV6_ECHO_REPLIES)
754                 fl6.flowlabel = ip6_flowlabel(ipv6_hdr(skb));
755
756         fl6.flowi6_proto = IPPROTO_ICMPV6;
757         fl6.daddr = ipv6_hdr(skb)->saddr;
758         if (saddr)
759                 fl6.saddr = *saddr;
760         fl6.flowi6_oif = icmp6_iif(skb);
761         fl6.fl6_icmp_type = type;
762         fl6.flowi6_mark = mark;
763         fl6.flowi6_uid = sock_net_uid(net, NULL);
764         security_skb_classify_flow(skb, flowi6_to_flowi_common(&fl6));
765
766         local_bh_disable();
767         sk = icmpv6_xmit_lock(net);
768         if (!sk)
769                 goto out_bh_enable;
770         np = inet6_sk(sk);
771
772         if (!fl6.flowi6_oif && ipv6_addr_is_multicast(&fl6.daddr))
773                 fl6.flowi6_oif = np->mcast_oif;
774         else if (!fl6.flowi6_oif)
775                 fl6.flowi6_oif = np->ucast_oif;
776
777         if (ip6_dst_lookup(net, sk, &dst, &fl6))
778                 goto out;
779         dst = xfrm_lookup(net, dst, flowi6_to_flowi(&fl6), sk, 0);
780         if (IS_ERR(dst))
781                 goto out;
782
783         /* Check the ratelimit */
784         if ((!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, ICMPV6_ECHO_REPLY)) ||
785             !icmpv6_xrlim_allow(sk, ICMPV6_ECHO_REPLY, &fl6))
786                 goto out_dst_release;
787
788         idev = __in6_dev_get(skb->dev);
789
790         msg.skb = skb;
791         msg.offset = 0;
792         msg.type = type;
793
794         ipcm6_init_sk(&ipc6, np);
795         ipc6.hlimit = ip6_sk_dst_hoplimit(np, &fl6, dst);
796         ipc6.tclass = ipv6_get_dsfield(ipv6_hdr(skb));
797         ipc6.sockc.mark = mark;
798
799         if (icmph->icmp6_type == ICMPV6_EXT_ECHO_REQUEST)
800                 if (!icmp_build_probe(skb, (struct icmphdr *)&tmp_hdr))
801                         goto out_dst_release;
802
803         if (ip6_append_data(sk, icmpv6_getfrag, &msg,
804                             skb->len + sizeof(struct icmp6hdr),
805                             sizeof(struct icmp6hdr), &ipc6, &fl6,
806                             (struct rt6_info *)dst, MSG_DONTWAIT)) {
807                 __ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTERRORS);
808                 ip6_flush_pending_frames(sk);
809         } else {
810                 icmpv6_push_pending_frames(sk, &fl6, &tmp_hdr,
811                                            skb->len + sizeof(struct icmp6hdr));
812                 reason = SKB_CONSUMED;
813         }
814 out_dst_release:
815         dst_release(dst);
816 out:
817         icmpv6_xmit_unlock(sk);
818 out_bh_enable:
819         local_bh_enable();
820         return reason;
821 }
822
823 enum skb_drop_reason icmpv6_notify(struct sk_buff *skb, u8 type,
824                                    u8 code, __be32 info)
825 {
826         struct inet6_skb_parm *opt = IP6CB(skb);
827         struct net *net = dev_net(skb->dev);
828         const struct inet6_protocol *ipprot;
829         enum skb_drop_reason reason;
830         int inner_offset;
831         __be16 frag_off;
832         u8 nexthdr;
833
834         reason = pskb_may_pull_reason(skb, sizeof(struct ipv6hdr));
835         if (reason != SKB_NOT_DROPPED_YET)
836                 goto out;
837
838         seg6_icmp_srh(skb, opt);
839
840         nexthdr = ((struct ipv6hdr *)skb->data)->nexthdr;
841         if (ipv6_ext_hdr(nexthdr)) {
842                 /* now skip over extension headers */
843                 inner_offset = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr),
844                                                 &nexthdr, &frag_off);
845                 if (inner_offset < 0) {
846                         SKB_DR_SET(reason, IPV6_BAD_EXTHDR);
847                         goto out;
848                 }
849         } else {
850                 inner_offset = sizeof(struct ipv6hdr);
851         }
852
853         /* Checkin header including 8 bytes of inner protocol header. */
854         reason = pskb_may_pull_reason(skb, inner_offset + 8);
855         if (reason != SKB_NOT_DROPPED_YET)
856                 goto out;
857
858         /* BUGGG_FUTURE: we should try to parse exthdrs in this packet.
859            Without this we will not able f.e. to make source routed
860            pmtu discovery.
861            Corresponding argument (opt) to notifiers is already added.
862            --ANK (980726)
863          */
864
865         ipprot = rcu_dereference(inet6_protos[nexthdr]);
866         if (ipprot && ipprot->err_handler)
867                 ipprot->err_handler(skb, opt, type, code, inner_offset, info);
868
869         raw6_icmp_error(skb, nexthdr, type, code, inner_offset, info);
870         return SKB_CONSUMED;
871
872 out:
873         __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
874         return reason;
875 }
876
877 /*
878  *      Handle icmp messages
879  */
880
881 static int icmpv6_rcv(struct sk_buff *skb)
882 {
883         enum skb_drop_reason reason = SKB_DROP_REASON_NOT_SPECIFIED;
884         struct net *net = dev_net(skb->dev);
885         struct net_device *dev = icmp6_dev(skb);
886         struct inet6_dev *idev = __in6_dev_get(dev);
887         const struct in6_addr *saddr, *daddr;
888         struct icmp6hdr *hdr;
889         u8 type;
890
891         if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
892                 struct sec_path *sp = skb_sec_path(skb);
893                 int nh;
894
895                 if (!(sp && sp->xvec[sp->len - 1]->props.flags &
896                                  XFRM_STATE_ICMP)) {
897                         reason = SKB_DROP_REASON_XFRM_POLICY;
898                         goto drop_no_count;
899                 }
900
901                 if (!pskb_may_pull(skb, sizeof(*hdr) + sizeof(struct ipv6hdr)))
902                         goto drop_no_count;
903
904                 nh = skb_network_offset(skb);
905                 skb_set_network_header(skb, sizeof(*hdr));
906
907                 if (!xfrm6_policy_check_reverse(NULL, XFRM_POLICY_IN,
908                                                 skb)) {
909                         reason = SKB_DROP_REASON_XFRM_POLICY;
910                         goto drop_no_count;
911                 }
912
913                 skb_set_network_header(skb, nh);
914         }
915
916         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INMSGS);
917
918         saddr = &ipv6_hdr(skb)->saddr;
919         daddr = &ipv6_hdr(skb)->daddr;
920
921         if (skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo)) {
922                 net_dbg_ratelimited("ICMPv6 checksum failed [%pI6c > %pI6c]\n",
923                                     saddr, daddr);
924                 goto csum_error;
925         }
926
927         if (!pskb_pull(skb, sizeof(*hdr)))
928                 goto discard_it;
929
930         hdr = icmp6_hdr(skb);
931
932         type = hdr->icmp6_type;
933
934         ICMP6MSGIN_INC_STATS(dev_net(dev), idev, type);
935
936         switch (type) {
937         case ICMPV6_ECHO_REQUEST:
938                 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all)
939                         reason = icmpv6_echo_reply(skb);
940                 break;
941         case ICMPV6_EXT_ECHO_REQUEST:
942                 if (!net->ipv6.sysctl.icmpv6_echo_ignore_all &&
943                     READ_ONCE(net->ipv4.sysctl_icmp_echo_enable_probe))
944                         reason = icmpv6_echo_reply(skb);
945                 break;
946
947         case ICMPV6_ECHO_REPLY:
948                 reason = ping_rcv(skb);
949                 break;
950
951         case ICMPV6_EXT_ECHO_REPLY:
952                 reason = ping_rcv(skb);
953                 break;
954
955         case ICMPV6_PKT_TOOBIG:
956                 /* BUGGG_FUTURE: if packet contains rthdr, we cannot update
957                    standard destination cache. Seems, only "advanced"
958                    destination cache will allow to solve this problem
959                    --ANK (980726)
960                  */
961                 if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
962                         goto discard_it;
963                 hdr = icmp6_hdr(skb);
964
965                 /* to notify */
966                 fallthrough;
967         case ICMPV6_DEST_UNREACH:
968         case ICMPV6_TIME_EXCEED:
969         case ICMPV6_PARAMPROB:
970                 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
971                                        hdr->icmp6_mtu);
972                 break;
973
974         case NDISC_ROUTER_SOLICITATION:
975         case NDISC_ROUTER_ADVERTISEMENT:
976         case NDISC_NEIGHBOUR_SOLICITATION:
977         case NDISC_NEIGHBOUR_ADVERTISEMENT:
978         case NDISC_REDIRECT:
979                 reason = ndisc_rcv(skb);
980                 break;
981
982         case ICMPV6_MGM_QUERY:
983                 igmp6_event_query(skb);
984                 return 0;
985
986         case ICMPV6_MGM_REPORT:
987                 igmp6_event_report(skb);
988                 return 0;
989
990         case ICMPV6_MGM_REDUCTION:
991         case ICMPV6_NI_QUERY:
992         case ICMPV6_NI_REPLY:
993         case ICMPV6_MLD2_REPORT:
994         case ICMPV6_DHAAD_REQUEST:
995         case ICMPV6_DHAAD_REPLY:
996         case ICMPV6_MOBILE_PREFIX_SOL:
997         case ICMPV6_MOBILE_PREFIX_ADV:
998                 break;
999
1000         default:
1001                 /* informational */
1002                 if (type & ICMPV6_INFOMSG_MASK)
1003                         break;
1004
1005                 net_dbg_ratelimited("icmpv6: msg of unknown type [%pI6c > %pI6c]\n",
1006                                     saddr, daddr);
1007
1008                 /*
1009                  * error of unknown type.
1010                  * must pass to upper level
1011                  */
1012
1013                 reason = icmpv6_notify(skb, type, hdr->icmp6_code,
1014                                        hdr->icmp6_mtu);
1015         }
1016
1017         /* until the v6 path can be better sorted assume failure and
1018          * preserve the status quo behaviour for the rest of the paths to here
1019          */
1020         if (reason)
1021                 kfree_skb_reason(skb, reason);
1022         else
1023                 consume_skb(skb);
1024
1025         return 0;
1026
1027 csum_error:
1028         reason = SKB_DROP_REASON_ICMP_CSUM;
1029         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_CSUMERRORS);
1030 discard_it:
1031         __ICMP6_INC_STATS(dev_net(dev), idev, ICMP6_MIB_INERRORS);
1032 drop_no_count:
1033         kfree_skb_reason(skb, reason);
1034         return 0;
1035 }
1036
1037 void icmpv6_flow_init(const struct sock *sk, struct flowi6 *fl6, u8 type,
1038                       const struct in6_addr *saddr,
1039                       const struct in6_addr *daddr, int oif)
1040 {
1041         memset(fl6, 0, sizeof(*fl6));
1042         fl6->saddr = *saddr;
1043         fl6->daddr = *daddr;
1044         fl6->flowi6_proto       = IPPROTO_ICMPV6;
1045         fl6->fl6_icmp_type      = type;
1046         fl6->fl6_icmp_code      = 0;
1047         fl6->flowi6_oif         = oif;
1048         security_sk_classify_flow(sk, flowi6_to_flowi_common(fl6));
1049 }
1050
1051 int __init icmpv6_init(void)
1052 {
1053         struct sock *sk;
1054         int err, i;
1055
1056         for_each_possible_cpu(i) {
1057                 err = inet_ctl_sock_create(&sk, PF_INET6,
1058                                            SOCK_RAW, IPPROTO_ICMPV6, &init_net);
1059                 if (err < 0) {
1060                         pr_err("Failed to initialize the ICMP6 control socket (err %d)\n",
1061                                err);
1062                         return err;
1063                 }
1064
1065                 per_cpu(ipv6_icmp_sk, i) = sk;
1066
1067                 /* Enough space for 2 64K ICMP packets, including
1068                  * sk_buff struct overhead.
1069                  */
1070                 sk->sk_sndbuf = 2 * SKB_TRUESIZE(64 * 1024);
1071         }
1072
1073         err = -EAGAIN;
1074         if (inet6_add_protocol(&icmpv6_protocol, IPPROTO_ICMPV6) < 0)
1075                 goto fail;
1076
1077         err = inet6_register_icmp_sender(icmp6_send);
1078         if (err)
1079                 goto sender_reg_err;
1080         return 0;
1081
1082 sender_reg_err:
1083         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1084 fail:
1085         pr_err("Failed to register ICMP6 protocol\n");
1086         return err;
1087 }
1088
1089 void icmpv6_cleanup(void)
1090 {
1091         inet6_unregister_icmp_sender(icmp6_send);
1092         inet6_del_protocol(&icmpv6_protocol, IPPROTO_ICMPV6);
1093 }
1094
1095
1096 static const struct icmp6_err {
1097         int err;
1098         int fatal;
1099 } tab_unreach[] = {
1100         {       /* NOROUTE */
1101                 .err    = ENETUNREACH,
1102                 .fatal  = 0,
1103         },
1104         {       /* ADM_PROHIBITED */
1105                 .err    = EACCES,
1106                 .fatal  = 1,
1107         },
1108         {       /* Was NOT_NEIGHBOUR, now reserved */
1109                 .err    = EHOSTUNREACH,
1110                 .fatal  = 0,
1111         },
1112         {       /* ADDR_UNREACH */
1113                 .err    = EHOSTUNREACH,
1114                 .fatal  = 0,
1115         },
1116         {       /* PORT_UNREACH */
1117                 .err    = ECONNREFUSED,
1118                 .fatal  = 1,
1119         },
1120         {       /* POLICY_FAIL */
1121                 .err    = EACCES,
1122                 .fatal  = 1,
1123         },
1124         {       /* REJECT_ROUTE */
1125                 .err    = EACCES,
1126                 .fatal  = 1,
1127         },
1128 };
1129
1130 int icmpv6_err_convert(u8 type, u8 code, int *err)
1131 {
1132         int fatal = 0;
1133
1134         *err = EPROTO;
1135
1136         switch (type) {
1137         case ICMPV6_DEST_UNREACH:
1138                 fatal = 1;
1139                 if (code < ARRAY_SIZE(tab_unreach)) {
1140                         *err  = tab_unreach[code].err;
1141                         fatal = tab_unreach[code].fatal;
1142                 }
1143                 break;
1144
1145         case ICMPV6_PKT_TOOBIG:
1146                 *err = EMSGSIZE;
1147                 break;
1148
1149         case ICMPV6_PARAMPROB:
1150                 *err = EPROTO;
1151                 fatal = 1;
1152                 break;
1153
1154         case ICMPV6_TIME_EXCEED:
1155                 *err = EHOSTUNREACH;
1156                 break;
1157         }
1158
1159         return fatal;
1160 }
1161 EXPORT_SYMBOL(icmpv6_err_convert);
1162
1163 #ifdef CONFIG_SYSCTL
1164 static struct ctl_table ipv6_icmp_table_template[] = {
1165         {
1166                 .procname       = "ratelimit",
1167                 .data           = &init_net.ipv6.sysctl.icmpv6_time,
1168                 .maxlen         = sizeof(int),
1169                 .mode           = 0644,
1170                 .proc_handler   = proc_dointvec_ms_jiffies,
1171         },
1172         {
1173                 .procname       = "echo_ignore_all",
1174                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_all,
1175                 .maxlen         = sizeof(u8),
1176                 .mode           = 0644,
1177                 .proc_handler = proc_dou8vec_minmax,
1178         },
1179         {
1180                 .procname       = "echo_ignore_multicast",
1181                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_multicast,
1182                 .maxlen         = sizeof(u8),
1183                 .mode           = 0644,
1184                 .proc_handler = proc_dou8vec_minmax,
1185         },
1186         {
1187                 .procname       = "echo_ignore_anycast",
1188                 .data           = &init_net.ipv6.sysctl.icmpv6_echo_ignore_anycast,
1189                 .maxlen         = sizeof(u8),
1190                 .mode           = 0644,
1191                 .proc_handler = proc_dou8vec_minmax,
1192         },
1193         {
1194                 .procname       = "ratemask",
1195                 .data           = &init_net.ipv6.sysctl.icmpv6_ratemask_ptr,
1196                 .maxlen         = ICMPV6_MSG_MAX + 1,
1197                 .mode           = 0644,
1198                 .proc_handler = proc_do_large_bitmap,
1199         },
1200         {
1201                 .procname       = "error_anycast_as_unicast",
1202                 .data           = &init_net.ipv6.sysctl.icmpv6_error_anycast_as_unicast,
1203                 .maxlen         = sizeof(u8),
1204                 .mode           = 0644,
1205                 .proc_handler   = proc_dou8vec_minmax,
1206                 .extra1         = SYSCTL_ZERO,
1207                 .extra2         = SYSCTL_ONE,
1208         },
1209         { },
1210 };
1211
1212 struct ctl_table * __net_init ipv6_icmp_sysctl_init(struct net *net)
1213 {
1214         struct ctl_table *table;
1215
1216         table = kmemdup(ipv6_icmp_table_template,
1217                         sizeof(ipv6_icmp_table_template),
1218                         GFP_KERNEL);
1219
1220         if (table) {
1221                 table[0].data = &net->ipv6.sysctl.icmpv6_time;
1222                 table[1].data = &net->ipv6.sysctl.icmpv6_echo_ignore_all;
1223                 table[2].data = &net->ipv6.sysctl.icmpv6_echo_ignore_multicast;
1224                 table[3].data = &net->ipv6.sysctl.icmpv6_echo_ignore_anycast;
1225                 table[4].data = &net->ipv6.sysctl.icmpv6_ratemask_ptr;
1226                 table[5].data = &net->ipv6.sysctl.icmpv6_error_anycast_as_unicast;
1227         }
1228         return table;
1229 }
1230
1231 size_t ipv6_icmp_sysctl_table_size(void)
1232 {
1233         return ARRAY_SIZE(ipv6_icmp_table_template);
1234 }
1235 #endif