1 // SPDX-License-Identifier: GPL-2.0-only
3 * Copyright (c) 2008 Patrick McHardy <kaber@trash.net>
5 * Development of this code funded by Astaro AG (http://www.astaro.com/)
8 #include <asm/unaligned.h>
9 #include <linux/kernel.h>
10 #include <linux/netlink.h>
11 #include <linux/netfilter.h>
12 #include <linux/netfilter/nf_tables.h>
13 #include <linux/dccp.h>
14 #include <linux/sctp.h>
15 #include <net/netfilter/nf_tables_core.h>
16 #include <net/netfilter/nf_tables.h>
29 static unsigned int optlen(const u8 *opt, unsigned int offset)
31 /* Beware zero-length options: make finite progress */
32 if (opt[offset] <= TCPOPT_NOP || opt[offset + 1] == 0)
35 return opt[offset + 1];
38 static void nft_exthdr_ipv6_eval(const struct nft_expr *expr,
39 struct nft_regs *regs,
40 const struct nft_pktinfo *pkt)
42 struct nft_exthdr *priv = nft_expr_priv(expr);
43 u32 *dest = ®s->data[priv->dreg];
44 unsigned int offset = 0;
47 if (pkt->skb->protocol != htons(ETH_P_IPV6))
50 err = ipv6_find_hdr(pkt->skb, &offset, priv->type, NULL, NULL);
51 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
52 nft_reg_store8(dest, err >= 0);
57 offset += priv->offset;
59 dest[priv->len / NFT_REG32_SIZE] = 0;
60 if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
64 regs->verdict.code = NFT_BREAK;
67 /* find the offset to specified option.
69 * If target header is found, its offset is set in *offset and return option
70 * number. Otherwise, return negative error.
72 * If the first fragment doesn't contain the End of Options it is considered
75 static int ipv4_find_option(struct net *net, struct sk_buff *skb,
76 unsigned int *offset, int target)
78 unsigned char optbuf[sizeof(struct ip_options) + 40];
79 struct ip_options *opt = (struct ip_options *)optbuf;
80 struct iphdr *iph, _iph;
86 iph = skb_header_pointer(skb, 0, sizeof(_iph), &_iph);
89 start = sizeof(struct iphdr);
91 optlen = iph->ihl * 4 - (int)sizeof(struct iphdr);
95 memset(opt, 0, sizeof(struct ip_options));
96 /* Copy the options since __ip_options_compile() modifies
99 if (skb_copy_bits(skb, start, opt->__data, optlen))
101 opt->optlen = optlen;
103 if (__ip_options_compile(net, opt, NULL, &info))
111 found = target == IPOPT_SSRR ? opt->is_strictroute :
112 !opt->is_strictroute;
114 *offset = opt->srr + start;
119 *offset = opt->rr + start;
123 if (!opt->router_alert)
125 *offset = opt->router_alert + start;
131 return found ? target : -ENOENT;
134 static void nft_exthdr_ipv4_eval(const struct nft_expr *expr,
135 struct nft_regs *regs,
136 const struct nft_pktinfo *pkt)
138 struct nft_exthdr *priv = nft_expr_priv(expr);
139 u32 *dest = ®s->data[priv->dreg];
140 struct sk_buff *skb = pkt->skb;
144 if (skb->protocol != htons(ETH_P_IP))
147 err = ipv4_find_option(nft_net(pkt), skb, &offset, priv->type);
148 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
149 nft_reg_store8(dest, err >= 0);
151 } else if (err < 0) {
154 offset += priv->offset;
156 dest[priv->len / NFT_REG32_SIZE] = 0;
157 if (skb_copy_bits(pkt->skb, offset, dest, priv->len) < 0)
161 regs->verdict.code = NFT_BREAK;
165 nft_tcp_header_pointer(const struct nft_pktinfo *pkt,
166 unsigned int len, void *buffer, unsigned int *tcphdr_len)
170 if (pkt->tprot != IPPROTO_TCP || pkt->fragoff)
173 tcph = skb_header_pointer(pkt->skb, nft_thoff(pkt), sizeof(*tcph), buffer);
177 *tcphdr_len = __tcp_hdrlen(tcph);
178 if (*tcphdr_len < sizeof(*tcph) || *tcphdr_len > len)
181 return skb_header_pointer(pkt->skb, nft_thoff(pkt), *tcphdr_len, buffer);
184 static void nft_exthdr_tcp_eval(const struct nft_expr *expr,
185 struct nft_regs *regs,
186 const struct nft_pktinfo *pkt)
188 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
189 struct nft_exthdr *priv = nft_expr_priv(expr);
190 unsigned int i, optl, tcphdr_len, offset;
191 u32 *dest = ®s->data[priv->dreg];
195 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
200 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
201 optl = optlen(opt, i);
203 if (priv->type != opt[i])
206 if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
209 offset = i + priv->offset;
210 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
213 dest[priv->len / NFT_REG32_SIZE] = 0;
214 memcpy(dest, opt + offset, priv->len);
221 if (priv->flags & NFT_EXTHDR_F_PRESENT)
224 regs->verdict.code = NFT_BREAK;
227 static void nft_exthdr_tcp_set_eval(const struct nft_expr *expr,
228 struct nft_regs *regs,
229 const struct nft_pktinfo *pkt)
231 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
232 struct nft_exthdr *priv = nft_expr_priv(expr);
233 unsigned int i, optl, tcphdr_len, offset;
237 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
242 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
248 optl = optlen(opt, i);
250 if (priv->type != opt[i])
253 if (i + optl > tcphdr_len || priv->len + priv->offset > optl)
256 if (skb_ensure_writable(pkt->skb,
257 nft_thoff(pkt) + i + priv->len))
260 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff,
265 offset = i + priv->offset;
269 old.v16 = (__force __be16)get_unaligned((u16 *)(opt + offset));
270 new.v16 = (__force __be16)nft_reg_load16(
271 ®s->data[priv->sreg]);
273 switch (priv->type) {
275 /* increase can cause connection to stall */
276 if (ntohs(old.v16) <= ntohs(new.v16))
281 if (old.v16 == new.v16)
284 put_unaligned(new.v16, (__be16*)(opt + offset));
285 inet_proto_csum_replace2(&tcph->check, pkt->skb,
286 old.v16, new.v16, false);
289 new.v32 = nft_reg_load_be32(®s->data[priv->sreg]);
290 old.v32 = (__force __be32)get_unaligned((u32 *)(opt + offset));
292 if (old.v32 == new.v32)
295 put_unaligned(new.v32, (__be32*)(opt + offset));
296 inet_proto_csum_replace4(&tcph->check, pkt->skb,
297 old.v32, new.v32, false);
308 regs->verdict.code = NFT_BREAK;
311 static void nft_exthdr_tcp_strip_eval(const struct nft_expr *expr,
312 struct nft_regs *regs,
313 const struct nft_pktinfo *pkt)
315 u8 buff[sizeof(struct tcphdr) + MAX_TCP_OPTION_SPACE];
316 struct nft_exthdr *priv = nft_expr_priv(expr);
317 unsigned int i, tcphdr_len, optl;
321 tcph = nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
325 if (skb_ensure_writable(pkt->skb, nft_thoff(pkt) + tcphdr_len))
328 opt = (u8 *)nft_tcp_header_pointer(pkt, sizeof(buff), buff, &tcphdr_len);
331 for (i = sizeof(*tcph); i < tcphdr_len - 1; i += optl) {
334 optl = optlen(opt, i);
335 if (priv->type != opt[i])
338 if (i + optl > tcphdr_len)
341 for (j = 0; j < optl; ++j) {
345 if ((i + j) % 2 == 0) {
349 inet_proto_csum_replace2(&tcph->check, pkt->skb, htons(o),
352 memset(opt + i, TCPOPT_NOP, optl);
356 /* option not found, continue. This allows to do multiple
357 * option removals per rule.
361 regs->verdict.code = NFT_BREAK;
364 /* can't remove, no choice but to drop */
365 regs->verdict.code = NF_DROP;
368 static void nft_exthdr_sctp_eval(const struct nft_expr *expr,
369 struct nft_regs *regs,
370 const struct nft_pktinfo *pkt)
372 unsigned int offset = nft_thoff(pkt) + sizeof(struct sctphdr);
373 struct nft_exthdr *priv = nft_expr_priv(expr);
374 u32 *dest = ®s->data[priv->dreg];
375 const struct sctp_chunkhdr *sch;
376 struct sctp_chunkhdr _sch;
378 if (pkt->tprot != IPPROTO_SCTP)
382 sch = skb_header_pointer(pkt->skb, offset, sizeof(_sch), &_sch);
383 if (!sch || !sch->length)
386 if (sch->type == priv->type) {
387 if (priv->flags & NFT_EXTHDR_F_PRESENT) {
388 nft_reg_store8(dest, true);
391 if (priv->offset + priv->len > ntohs(sch->length) ||
392 offset + ntohs(sch->length) > pkt->skb->len)
395 dest[priv->len / NFT_REG32_SIZE] = 0;
396 if (skb_copy_bits(pkt->skb, offset + priv->offset,
397 dest, priv->len) < 0)
401 offset += SCTP_PAD4(ntohs(sch->length));
402 } while (offset < pkt->skb->len);
404 if (priv->flags & NFT_EXTHDR_F_PRESENT)
405 nft_reg_store8(dest, false);
407 regs->verdict.code = NFT_BREAK;
410 static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
411 struct nft_regs *regs,
412 const struct nft_pktinfo *pkt)
414 struct nft_exthdr *priv = nft_expr_priv(expr);
415 unsigned int thoff, dataoff, optoff, optlen, i;
416 u32 *dest = ®s->data[priv->dreg];
417 const struct dccp_hdr *dh;
420 if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
423 thoff = nft_thoff(pkt);
425 dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
429 dataoff = dh->dccph_doff * sizeof(u32);
430 optoff = __dccp_hdr_len(dh);
431 if (dataoff <= optoff)
434 optlen = dataoff - optoff;
436 for (i = 0; i < optlen; ) {
437 /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
438 * the length; the remaining options are at least 2B long. In
439 * all cases, the first byte contains the option type. In
440 * multi-byte options, the second byte contains the option
441 * length, which must be at least two: 1 for the type plus 1 for
442 * the length plus 0-253 for any following option data. We
443 * aren't interested in the option data, only the type and the
444 * length, so we don't need to read more than two bytes at a
447 unsigned int buflen = optlen - i;
451 if (buflen > sizeof(buf))
452 buflen = sizeof(buf);
454 bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
461 if (type == priv->type) {
466 if (type <= DCCPO_MAX_RESERVED) {
486 static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
487 [NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
488 [NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
489 [NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
490 [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255),
491 [NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
492 [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255),
493 [NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
496 static int nft_exthdr_init(const struct nft_ctx *ctx,
497 const struct nft_expr *expr,
498 const struct nlattr * const tb[])
500 struct nft_exthdr *priv = nft_expr_priv(expr);
501 u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
504 if (!tb[NFTA_EXTHDR_DREG] ||
505 !tb[NFTA_EXTHDR_TYPE] ||
506 !tb[NFTA_EXTHDR_OFFSET] ||
507 !tb[NFTA_EXTHDR_LEN])
510 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
514 err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
518 if (tb[NFTA_EXTHDR_FLAGS]) {
519 err = nft_parse_u32_check(tb[NFTA_EXTHDR_FLAGS], U8_MAX, &flags);
523 if (flags & ~NFT_EXTHDR_F_PRESENT)
527 if (tb[NFTA_EXTHDR_OP]) {
528 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
533 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
534 priv->offset = offset;
539 return nft_parse_register_store(ctx, tb[NFTA_EXTHDR_DREG],
540 &priv->dreg, NULL, NFT_DATA_VALUE,
544 static int nft_exthdr_tcp_set_init(const struct nft_ctx *ctx,
545 const struct nft_expr *expr,
546 const struct nlattr * const tb[])
548 struct nft_exthdr *priv = nft_expr_priv(expr);
549 u32 offset, len, flags = 0, op = NFT_EXTHDR_OP_IPV6;
552 if (!tb[NFTA_EXTHDR_SREG] ||
553 !tb[NFTA_EXTHDR_TYPE] ||
554 !tb[NFTA_EXTHDR_OFFSET] ||
555 !tb[NFTA_EXTHDR_LEN])
558 if (tb[NFTA_EXTHDR_DREG] || tb[NFTA_EXTHDR_FLAGS])
561 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OFFSET], U8_MAX, &offset);
565 err = nft_parse_u32_check(tb[NFTA_EXTHDR_LEN], U8_MAX, &len);
579 err = nft_parse_u32_check(tb[NFTA_EXTHDR_OP], U8_MAX, &op);
583 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
584 priv->offset = offset;
589 return nft_parse_register_load(tb[NFTA_EXTHDR_SREG], &priv->sreg,
593 static int nft_exthdr_tcp_strip_init(const struct nft_ctx *ctx,
594 const struct nft_expr *expr,
595 const struct nlattr * const tb[])
597 struct nft_exthdr *priv = nft_expr_priv(expr);
599 if (tb[NFTA_EXTHDR_SREG] ||
600 tb[NFTA_EXTHDR_DREG] ||
601 tb[NFTA_EXTHDR_FLAGS] ||
602 tb[NFTA_EXTHDR_OFFSET] ||
606 if (!tb[NFTA_EXTHDR_TYPE])
609 priv->type = nla_get_u8(tb[NFTA_EXTHDR_TYPE]);
610 priv->op = NFT_EXTHDR_OP_TCPOPT;
615 static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
616 const struct nft_expr *expr,
617 const struct nlattr * const tb[])
619 struct nft_exthdr *priv = nft_expr_priv(expr);
620 int err = nft_exthdr_init(ctx, expr, tb);
625 switch (priv->type) {
637 static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
638 const struct nft_expr *expr,
639 const struct nlattr * const tb[])
641 struct nft_exthdr *priv = nft_expr_priv(expr);
642 int err = nft_exthdr_init(ctx, expr, tb);
647 if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
653 static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
655 if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
656 goto nla_put_failure;
657 if (nla_put_be32(skb, NFTA_EXTHDR_OFFSET, htonl(priv->offset)))
658 goto nla_put_failure;
659 if (nla_put_be32(skb, NFTA_EXTHDR_LEN, htonl(priv->len)))
660 goto nla_put_failure;
661 if (nla_put_be32(skb, NFTA_EXTHDR_FLAGS, htonl(priv->flags)))
662 goto nla_put_failure;
663 if (nla_put_be32(skb, NFTA_EXTHDR_OP, htonl(priv->op)))
664 goto nla_put_failure;
671 static int nft_exthdr_dump(struct sk_buff *skb,
672 const struct nft_expr *expr, bool reset)
674 const struct nft_exthdr *priv = nft_expr_priv(expr);
676 if (nft_dump_register(skb, NFTA_EXTHDR_DREG, priv->dreg))
679 return nft_exthdr_dump_common(skb, priv);
682 static int nft_exthdr_dump_set(struct sk_buff *skb,
683 const struct nft_expr *expr, bool reset)
685 const struct nft_exthdr *priv = nft_expr_priv(expr);
687 if (nft_dump_register(skb, NFTA_EXTHDR_SREG, priv->sreg))
690 return nft_exthdr_dump_common(skb, priv);
693 static int nft_exthdr_dump_strip(struct sk_buff *skb,
694 const struct nft_expr *expr, bool reset)
696 const struct nft_exthdr *priv = nft_expr_priv(expr);
698 return nft_exthdr_dump_common(skb, priv);
701 static bool nft_exthdr_reduce(struct nft_regs_track *track,
702 const struct nft_expr *expr)
704 const struct nft_exthdr *priv = nft_expr_priv(expr);
705 const struct nft_exthdr *exthdr;
707 if (!nft_reg_track_cmp(track, expr, priv->dreg)) {
708 nft_reg_track_update(track, expr, priv->dreg, priv->len);
712 exthdr = nft_expr_priv(track->regs[priv->dreg].selector);
713 if (priv->type != exthdr->type ||
714 priv->op != exthdr->op ||
715 priv->flags != exthdr->flags ||
716 priv->offset != exthdr->offset ||
717 priv->len != exthdr->len) {
718 nft_reg_track_update(track, expr, priv->dreg, priv->len);
722 if (!track->regs[priv->dreg].bitwise)
725 return nft_expr_reduce_bitwise(track, expr);
728 static const struct nft_expr_ops nft_exthdr_ipv6_ops = {
729 .type = &nft_exthdr_type,
730 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
731 .eval = nft_exthdr_ipv6_eval,
732 .init = nft_exthdr_init,
733 .dump = nft_exthdr_dump,
734 .reduce = nft_exthdr_reduce,
737 static const struct nft_expr_ops nft_exthdr_ipv4_ops = {
738 .type = &nft_exthdr_type,
739 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
740 .eval = nft_exthdr_ipv4_eval,
741 .init = nft_exthdr_ipv4_init,
742 .dump = nft_exthdr_dump,
743 .reduce = nft_exthdr_reduce,
746 static const struct nft_expr_ops nft_exthdr_tcp_ops = {
747 .type = &nft_exthdr_type,
748 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
749 .eval = nft_exthdr_tcp_eval,
750 .init = nft_exthdr_init,
751 .dump = nft_exthdr_dump,
752 .reduce = nft_exthdr_reduce,
755 static const struct nft_expr_ops nft_exthdr_tcp_set_ops = {
756 .type = &nft_exthdr_type,
757 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
758 .eval = nft_exthdr_tcp_set_eval,
759 .init = nft_exthdr_tcp_set_init,
760 .dump = nft_exthdr_dump_set,
761 .reduce = NFT_REDUCE_READONLY,
764 static const struct nft_expr_ops nft_exthdr_tcp_strip_ops = {
765 .type = &nft_exthdr_type,
766 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
767 .eval = nft_exthdr_tcp_strip_eval,
768 .init = nft_exthdr_tcp_strip_init,
769 .dump = nft_exthdr_dump_strip,
770 .reduce = NFT_REDUCE_READONLY,
773 static const struct nft_expr_ops nft_exthdr_sctp_ops = {
774 .type = &nft_exthdr_type,
775 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
776 .eval = nft_exthdr_sctp_eval,
777 .init = nft_exthdr_init,
778 .dump = nft_exthdr_dump,
779 .reduce = nft_exthdr_reduce,
782 static const struct nft_expr_ops nft_exthdr_dccp_ops = {
783 .type = &nft_exthdr_type,
784 .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
785 .eval = nft_exthdr_dccp_eval,
786 .init = nft_exthdr_dccp_init,
787 .dump = nft_exthdr_dump,
788 .reduce = nft_exthdr_reduce,
791 static const struct nft_expr_ops *
792 nft_exthdr_select_ops(const struct nft_ctx *ctx,
793 const struct nlattr * const tb[])
797 if (!tb[NFTA_EXTHDR_OP])
798 return &nft_exthdr_ipv6_ops;
800 if (tb[NFTA_EXTHDR_SREG] && tb[NFTA_EXTHDR_DREG])
801 return ERR_PTR(-EOPNOTSUPP);
803 op = ntohl(nla_get_be32(tb[NFTA_EXTHDR_OP]));
805 case NFT_EXTHDR_OP_TCPOPT:
806 if (tb[NFTA_EXTHDR_SREG])
807 return &nft_exthdr_tcp_set_ops;
808 if (tb[NFTA_EXTHDR_DREG])
809 return &nft_exthdr_tcp_ops;
810 return &nft_exthdr_tcp_strip_ops;
811 case NFT_EXTHDR_OP_IPV6:
812 if (tb[NFTA_EXTHDR_DREG])
813 return &nft_exthdr_ipv6_ops;
815 case NFT_EXTHDR_OP_IPV4:
816 if (ctx->family != NFPROTO_IPV6) {
817 if (tb[NFTA_EXTHDR_DREG])
818 return &nft_exthdr_ipv4_ops;
821 case NFT_EXTHDR_OP_SCTP:
822 if (tb[NFTA_EXTHDR_DREG])
823 return &nft_exthdr_sctp_ops;
825 case NFT_EXTHDR_OP_DCCP:
826 if (tb[NFTA_EXTHDR_DREG])
827 return &nft_exthdr_dccp_ops;
831 return ERR_PTR(-EOPNOTSUPP);
834 struct nft_expr_type nft_exthdr_type __read_mostly = {
836 .select_ops = nft_exthdr_select_ops,
837 .policy = nft_exthdr_policy,
838 .maxattr = NFTA_EXTHDR_MAX,
839 .owner = THIS_MODULE,