2 * Copyright (c) 2017 Mellanox Technologies. All rights reserved.
4 * This software is available to you under a choice of one of two
5 * licenses. You may choose to be licensed under the terms of the GNU
6 * General Public License (GPL) Version 2, available from the file
7 * COPYING in the main directory of this source tree, or the
8 * OpenIB.org BSD license below:
10 * Redistribution and use in source and binary forms, with or
11 * without modification, are permitted provided that the following
14 * - Redistributions of source code must retain the above
15 * copyright notice, this list of conditions and the following
18 * - Redistributions in binary form must reproduce the above
19 * copyright notice, this list of conditions and the following
20 * disclaimer in the documentation and/or other materials
21 * provided with the distribution.
23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
34 #include <crypto/internal/geniv.h>
35 #include <crypto/aead.h>
36 #include <linux/inetdevice.h>
37 #include <linux/netdevice.h>
38 #include <net/netevent.h>
43 #include "ipsec_rxtx.h"
46 #define MLX5_IPSEC_RESCHED msecs_to_jiffies(1000)
47 #define MLX5E_IPSEC_TUNNEL_SA XA_MARK_1
49 static struct mlx5e_ipsec_sa_entry *to_ipsec_sa_entry(struct xfrm_state *x)
51 return (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
54 static struct mlx5e_ipsec_pol_entry *to_ipsec_pol_entry(struct xfrm_policy *x)
56 return (struct mlx5e_ipsec_pol_entry *)x->xdo.offload_handle;
59 static void mlx5e_ipsec_handle_tx_limit(struct work_struct *_work)
61 struct mlx5e_ipsec_dwork *dwork =
62 container_of(_work, struct mlx5e_ipsec_dwork, dwork.work);
63 struct mlx5e_ipsec_sa_entry *sa_entry = dwork->sa_entry;
64 struct xfrm_state *x = sa_entry->x;
66 if (sa_entry->attrs.drop)
69 spin_lock_bh(&x->lock);
70 xfrm_state_check_expire(x);
71 if (x->km.state == XFRM_STATE_EXPIRED) {
72 sa_entry->attrs.drop = true;
73 spin_unlock_bh(&x->lock);
75 mlx5e_accel_ipsec_fs_modify(sa_entry);
78 spin_unlock_bh(&x->lock);
80 queue_delayed_work(sa_entry->ipsec->wq, &dwork->dwork,
84 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
86 struct xfrm_state *x = sa_entry->x;
91 switch (x->xso.type) {
92 case XFRM_DEV_OFFLOAD_PACKET:
94 case XFRM_DEV_OFFLOAD_IN:
95 esn = x->replay_esn->seq;
96 esn_msb = x->replay_esn->seq_hi;
98 case XFRM_DEV_OFFLOAD_OUT:
99 esn = x->replay_esn->oseq;
100 esn_msb = x->replay_esn->oseq_hi;
107 case XFRM_DEV_OFFLOAD_CRYPTO:
108 /* Already parsed by XFRM core */
109 esn = x->replay_esn->seq;
116 overlap = sa_entry->esn_state.overlap;
118 if (esn >= x->replay_esn->replay_window)
119 seq_bottom = esn - x->replay_esn->replay_window + 1;
121 if (x->xso.type == XFRM_DEV_OFFLOAD_CRYPTO)
122 esn_msb = xfrm_replay_seqhi(x, htonl(seq_bottom));
124 sa_entry->esn_state.esn = esn;
125 sa_entry->esn_state.esn_msb = esn_msb;
127 if (unlikely(overlap && seq_bottom < MLX5E_IPSEC_ESN_SCOPE_MID)) {
128 sa_entry->esn_state.overlap = 0;
130 } else if (unlikely(!overlap &&
131 (seq_bottom >= MLX5E_IPSEC_ESN_SCOPE_MID))) {
132 sa_entry->esn_state.overlap = 1;
139 static void mlx5e_ipsec_init_limits(struct mlx5e_ipsec_sa_entry *sa_entry,
140 struct mlx5_accel_esp_xfrm_attrs *attrs)
142 struct xfrm_state *x = sa_entry->x;
145 attrs->lft.hard_packet_limit = x->lft.hard_packet_limit;
146 attrs->lft.soft_packet_limit = x->lft.soft_packet_limit;
147 if (x->lft.soft_packet_limit == XFRM_INF)
150 /* Compute hard limit initial value and number of rounds.
152 * The counting pattern of hardware counter goes:
154 * 2^31 | (2^31-1) -> 2^31-1
155 * 2^31 | (2^31-1) -> 2^31-1
157 * 2^31 | (2^31-1) -> 0
159 * The pattern is created by using an ASO operation to atomically set
160 * bit 31 after the down counter clears bit 31. This is effectively an
161 * atomic addition of 2**31 to the counter.
163 * We wish to configure the counter, within the above pattern, so that
164 * when it reaches 0, it has hit the hard limit. This is defined by this
165 * system of equations:
167 * hard_limit == start_value + n * 2^31
169 * start_value < 2^32, start_value >= 0
171 * These equations are not single-solution, there are often two choices:
172 * hard_limit == start_value + n * 2^31
173 * hard_limit == (start_value+2^31) + (n-1) * 2^31
175 * The algorithm selects the solution that keeps the counter value
176 * above 2^31 until the final iteration.
179 /* Start by estimating n and compute start_value */
180 n = attrs->lft.hard_packet_limit / BIT_ULL(31);
181 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
183 /* Choose the best of the two solutions: */
187 /* Computed values solve the system of equations: */
188 start_value = attrs->lft.hard_packet_limit - n * BIT_ULL(31);
190 /* The best solution means: when there are multiple iterations we must
191 * start above 2^31 and count down to 2**31 to get the interrupt.
193 attrs->lft.hard_packet_limit = lower_32_bits(start_value);
194 attrs->lft.numb_rounds_hard = (u64)n;
196 /* Compute soft limit initial value and number of rounds.
198 * The soft_limit is achieved by adjusting the counter's
199 * interrupt_value. This is embedded in the counting pattern created by
200 * hard packet calculations above.
202 * We wish to compute the interrupt_value for the soft_limit. This is
203 * defined by this system of equations:
205 * soft_limit == start_value - soft_value + n * 2^31
207 * soft_value < 2^32, soft_value >= 0
208 * for n == 0 start_value > soft_value
210 * As with compute_hard_n_value() the equations are not single-solution.
211 * The algorithm selects the solution that has:
212 * 2^30 <= soft_limit < 2^31 + 2^30
213 * for the interior iterations, which guarantees a large guard band
214 * around the counter hard limit and next interrupt.
217 /* Start by estimating n and compute soft_value */
218 n = (x->lft.soft_packet_limit - attrs->lft.hard_packet_limit) / BIT_ULL(31);
219 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) -
220 x->lft.soft_packet_limit;
222 /* Compare against constraints and adjust n */
225 else if (start_value >= BIT_ULL(32))
227 else if (start_value < 0)
230 /* Choose the best of the two solutions: */
231 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
232 if (n != attrs->lft.numb_rounds_hard && start_value < BIT_ULL(30))
235 /* Note that the upper limit of soft_value happens naturally because we
236 * always select the lowest soft_value.
239 /* Computed values solve the system of equations: */
240 start_value = attrs->lft.hard_packet_limit + n * BIT_ULL(31) - start_value;
242 /* The best solution means: when there are multiple iterations we must
243 * not fall below 2^30 as that would get too close to the false
244 * hard_limit and when we reach an interior iteration for soft_limit it
245 * has to be far away from 2**32-1 which is the counter reset point
246 * after the +2^31 to accommodate latency.
248 attrs->lft.soft_packet_limit = lower_32_bits(start_value);
249 attrs->lft.numb_rounds_soft = (u64)n;
252 static void mlx5e_ipsec_init_macs(struct mlx5e_ipsec_sa_entry *sa_entry,
253 struct mlx5_accel_esp_xfrm_attrs *attrs)
255 struct mlx5_core_dev *mdev = mlx5e_ipsec_sa2dev(sa_entry);
256 struct xfrm_state *x = sa_entry->x;
257 struct net_device *netdev;
263 if (attrs->mode != XFRM_MODE_TUNNEL ||
264 attrs->type != XFRM_DEV_OFFLOAD_PACKET)
267 netdev = x->xso.real_dev;
269 mlx5_query_mac_address(mdev, addr);
270 switch (attrs->dir) {
271 case XFRM_DEV_OFFLOAD_IN:
274 pkey = &attrs->saddr.a4;
276 case XFRM_DEV_OFFLOAD_OUT:
279 pkey = &attrs->daddr.a4;
285 ether_addr_copy(src, addr);
286 n = neigh_lookup(&arp_tbl, pkey, netdev);
288 n = neigh_create(&arp_tbl, pkey, netdev);
291 neigh_event_send(n, NULL);
294 neigh_ha_snapshot(addr, n, netdev);
295 ether_addr_copy(dst, addr);
300 void mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
301 struct mlx5_accel_esp_xfrm_attrs *attrs)
303 struct xfrm_state *x = sa_entry->x;
304 struct aes_gcm_keymat *aes_gcm = &attrs->aes_gcm;
305 struct aead_geniv_ctx *geniv_ctx;
306 struct crypto_aead *aead;
307 unsigned int crypto_data_len, key_len;
310 memset(attrs, 0, sizeof(*attrs));
313 crypto_data_len = (x->aead->alg_key_len + 7) / 8;
314 key_len = crypto_data_len - 4; /* 4 bytes salt at end */
316 memcpy(aes_gcm->aes_key, x->aead->alg_key, key_len);
317 aes_gcm->key_len = key_len * 8;
319 /* salt and seq_iv */
321 geniv_ctx = crypto_aead_ctx(aead);
322 ivsize = crypto_aead_ivsize(aead);
323 memcpy(&aes_gcm->seq_iv, &geniv_ctx->salt, ivsize);
324 memcpy(&aes_gcm->salt, x->aead->alg_key + key_len,
325 sizeof(aes_gcm->salt));
327 attrs->authsize = crypto_aead_authsize(aead) / 4; /* in dwords */
330 aes_gcm->icv_len = x->aead->alg_icv_len;
333 if (x->props.flags & XFRM_STATE_ESN) {
334 attrs->replay_esn.trigger = true;
335 attrs->replay_esn.esn = sa_entry->esn_state.esn;
336 attrs->replay_esn.esn_msb = sa_entry->esn_state.esn_msb;
337 attrs->replay_esn.overlap = sa_entry->esn_state.overlap;
340 attrs->dir = x->xso.dir;
342 attrs->spi = be32_to_cpu(x->id.spi);
344 /* source , destination ips */
345 memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr));
346 memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr));
347 attrs->family = x->props.family;
348 attrs->type = x->xso.type;
349 attrs->reqid = x->props.reqid;
350 attrs->upspec.dport = ntohs(x->sel.dport);
351 attrs->upspec.dport_mask = ntohs(x->sel.dport_mask);
352 attrs->upspec.sport = ntohs(x->sel.sport);
353 attrs->upspec.sport_mask = ntohs(x->sel.sport_mask);
354 attrs->upspec.proto = x->sel.proto;
355 attrs->mode = x->props.mode;
357 mlx5e_ipsec_init_limits(sa_entry, attrs);
358 mlx5e_ipsec_init_macs(sa_entry, attrs);
362 attrs->sport = x->encap->encap_sport;
363 attrs->dport = x->encap->encap_dport;
367 static int mlx5e_xfrm_validate_state(struct mlx5_core_dev *mdev,
368 struct xfrm_state *x,
369 struct netlink_ext_ack *extack)
371 if (x->props.aalgo != SADB_AALG_NONE) {
372 NL_SET_ERR_MSG_MOD(extack, "Cannot offload authenticated xfrm states");
375 if (x->props.ealgo != SADB_X_EALG_AES_GCM_ICV16) {
376 NL_SET_ERR_MSG_MOD(extack, "Only AES-GCM-ICV16 xfrm state may be offloaded");
379 if (x->props.calgo != SADB_X_CALG_NONE) {
380 NL_SET_ERR_MSG_MOD(extack, "Cannot offload compressed xfrm states");
383 if (x->props.flags & XFRM_STATE_ESN &&
384 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESN)) {
385 NL_SET_ERR_MSG_MOD(extack, "Cannot offload ESN xfrm states");
388 if (x->props.family != AF_INET &&
389 x->props.family != AF_INET6) {
390 NL_SET_ERR_MSG_MOD(extack, "Only IPv4/6 xfrm states may be offloaded");
393 if (x->id.proto != IPPROTO_ESP) {
394 NL_SET_ERR_MSG_MOD(extack, "Only ESP xfrm state may be offloaded");
398 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_ESPINUDP)) {
399 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is not supported");
403 if (x->encap->encap_type != UDP_ENCAP_ESPINUDP) {
404 NL_SET_ERR_MSG_MOD(extack, "Encapsulation other than UDP is not supported");
408 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET) {
409 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in packet offload mode only");
413 if (x->props.mode != XFRM_MODE_TRANSPORT) {
414 NL_SET_ERR_MSG_MOD(extack, "Encapsulation is supported in transport mode only");
419 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without aead");
422 if (x->aead->alg_icv_len != 128) {
423 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD ICV length other than 128bit");
426 if ((x->aead->alg_key_len != 128 + 32) &&
427 (x->aead->alg_key_len != 256 + 32)) {
428 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with AEAD key length other than 128/256 bit");
432 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with tfc padding");
436 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states without geniv");
439 if (strcmp(x->geniv, "seqiv")) {
440 NL_SET_ERR_MSG_MOD(extack, "Cannot offload xfrm states with geniv other than seqiv");
444 if (x->sel.proto != IPPROTO_IP && x->sel.proto != IPPROTO_UDP &&
445 x->sel.proto != IPPROTO_TCP) {
446 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
450 if (x->props.mode != XFRM_MODE_TRANSPORT && x->props.mode != XFRM_MODE_TUNNEL) {
451 NL_SET_ERR_MSG_MOD(extack, "Only transport and tunnel xfrm states may be offloaded");
455 switch (x->xso.type) {
456 case XFRM_DEV_OFFLOAD_CRYPTO:
457 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_CRYPTO)) {
458 NL_SET_ERR_MSG_MOD(extack, "Crypto offload is not supported");
463 case XFRM_DEV_OFFLOAD_PACKET:
464 if (!(mlx5_ipsec_device_caps(mdev) &
465 MLX5_IPSEC_CAP_PACKET_OFFLOAD)) {
466 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported");
470 if (x->props.mode == XFRM_MODE_TUNNEL &&
471 !(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_TUNNEL)) {
472 NL_SET_ERR_MSG_MOD(extack, "Packet offload is not supported for tunnel mode");
476 if (x->replay_esn && x->replay_esn->replay_window != 32 &&
477 x->replay_esn->replay_window != 64 &&
478 x->replay_esn->replay_window != 128 &&
479 x->replay_esn->replay_window != 256) {
480 NL_SET_ERR_MSG_MOD(extack, "Unsupported replay window size");
484 if (!x->props.reqid) {
485 NL_SET_ERR_MSG_MOD(extack, "Cannot offload without reqid");
489 if (x->lft.hard_byte_limit != XFRM_INF ||
490 x->lft.soft_byte_limit != XFRM_INF) {
491 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support limits in bytes");
495 if (x->lft.soft_packet_limit >= x->lft.hard_packet_limit &&
496 x->lft.hard_packet_limit != XFRM_INF) {
497 /* XFRM stack doesn't prevent such configuration :(. */
498 NL_SET_ERR_MSG_MOD(extack, "Hard packet limit must be greater than soft one");
502 if (!x->lft.soft_packet_limit || !x->lft.hard_packet_limit) {
503 NL_SET_ERR_MSG_MOD(extack, "Soft/hard packet limits can't be 0");
508 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
514 static void mlx5e_ipsec_modify_state(struct work_struct *_work)
516 struct mlx5e_ipsec_work *work =
517 container_of(_work, struct mlx5e_ipsec_work, work);
518 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
519 struct mlx5_accel_esp_xfrm_attrs *attrs;
521 attrs = &((struct mlx5e_ipsec_sa_entry *)work->data)->attrs;
523 mlx5_accel_esp_modify_xfrm(sa_entry, attrs);
526 static void mlx5e_ipsec_set_esn_ops(struct mlx5e_ipsec_sa_entry *sa_entry)
528 struct xfrm_state *x = sa_entry->x;
530 if (x->xso.type != XFRM_DEV_OFFLOAD_CRYPTO ||
531 x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
534 if (x->props.flags & XFRM_STATE_ESN) {
535 sa_entry->set_iv_op = mlx5e_ipsec_set_iv_esn;
539 sa_entry->set_iv_op = mlx5e_ipsec_set_iv;
542 static void mlx5e_ipsec_handle_netdev_event(struct work_struct *_work)
544 struct mlx5e_ipsec_work *work =
545 container_of(_work, struct mlx5e_ipsec_work, work);
546 struct mlx5e_ipsec_sa_entry *sa_entry = work->sa_entry;
547 struct mlx5e_ipsec_netevent_data *data = work->data;
548 struct mlx5_accel_esp_xfrm_attrs *attrs;
550 attrs = &sa_entry->attrs;
552 switch (attrs->dir) {
553 case XFRM_DEV_OFFLOAD_IN:
554 ether_addr_copy(attrs->smac, data->addr);
556 case XFRM_DEV_OFFLOAD_OUT:
557 ether_addr_copy(attrs->dmac, data->addr);
563 mlx5e_accel_ipsec_fs_modify(sa_entry);
566 static int mlx5_ipsec_create_work(struct mlx5e_ipsec_sa_entry *sa_entry)
568 struct xfrm_state *x = sa_entry->x;
569 struct mlx5e_ipsec_work *work;
572 switch (x->xso.type) {
573 case XFRM_DEV_OFFLOAD_CRYPTO:
574 if (!(x->props.flags & XFRM_STATE_ESN))
577 case XFRM_DEV_OFFLOAD_PACKET:
578 if (x->props.mode != XFRM_MODE_TUNNEL)
585 work = kzalloc(sizeof(*work), GFP_KERNEL);
589 switch (x->xso.type) {
590 case XFRM_DEV_OFFLOAD_CRYPTO:
591 data = kzalloc(sizeof(*sa_entry), GFP_KERNEL);
595 INIT_WORK(&work->work, mlx5e_ipsec_modify_state);
597 case XFRM_DEV_OFFLOAD_PACKET:
598 data = kzalloc(sizeof(struct mlx5e_ipsec_netevent_data),
603 INIT_WORK(&work->work, mlx5e_ipsec_handle_netdev_event);
610 work->sa_entry = sa_entry;
611 sa_entry->work = work;
619 static int mlx5e_ipsec_create_dwork(struct mlx5e_ipsec_sa_entry *sa_entry)
621 struct xfrm_state *x = sa_entry->x;
622 struct mlx5e_ipsec_dwork *dwork;
624 if (x->xso.type != XFRM_DEV_OFFLOAD_PACKET)
627 if (x->xso.dir != XFRM_DEV_OFFLOAD_OUT)
630 if (x->lft.soft_packet_limit == XFRM_INF &&
631 x->lft.hard_packet_limit == XFRM_INF)
634 dwork = kzalloc(sizeof(*dwork), GFP_KERNEL);
638 dwork->sa_entry = sa_entry;
639 INIT_DELAYED_WORK(&dwork->dwork, mlx5e_ipsec_handle_tx_limit);
640 sa_entry->dwork = dwork;
644 static int mlx5e_xfrm_add_state(struct xfrm_state *x,
645 struct netlink_ext_ack *extack)
647 struct mlx5e_ipsec_sa_entry *sa_entry = NULL;
648 struct net_device *netdev = x->xso.real_dev;
649 struct mlx5e_ipsec *ipsec;
650 struct mlx5e_priv *priv;
654 priv = netdev_priv(netdev);
659 gfp = (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ) ? GFP_ATOMIC : GFP_KERNEL;
660 sa_entry = kzalloc(sizeof(*sa_entry), gfp);
665 sa_entry->ipsec = ipsec;
666 /* Check if this SA is originated from acquire flow temporary SA */
667 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
670 err = mlx5e_xfrm_validate_state(priv->mdev, x, extack);
674 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
680 if (x->props.flags & XFRM_STATE_ESN)
681 mlx5e_ipsec_update_esn_state(sa_entry);
683 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry->attrs);
685 err = mlx5_ipsec_create_work(sa_entry);
689 err = mlx5e_ipsec_create_dwork(sa_entry);
693 /* create hw context */
694 err = mlx5_ipsec_create_sa_ctx(sa_entry);
698 err = mlx5e_accel_ipsec_fs_add_rule(sa_entry);
702 if (x->props.mode == XFRM_MODE_TUNNEL &&
703 x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
704 !mlx5e_ipsec_fs_tunnel_enabled(sa_entry)) {
705 NL_SET_ERR_MSG_MOD(extack, "Packet offload tunnel mode is disabled due to encap settings");
710 /* We use *_bh() variant because xfrm_timer_handler(), which runs
711 * in softirq context, can reach our state delete logic and we need
712 * xa_erase_bh() there.
714 err = xa_insert_bh(&ipsec->sadb, sa_entry->ipsec_obj_id, sa_entry,
719 mlx5e_ipsec_set_esn_ops(sa_entry);
722 queue_delayed_work(ipsec->wq, &sa_entry->dwork->dwork,
725 if (x->xso.type == XFRM_DEV_OFFLOAD_PACKET &&
726 x->props.mode == XFRM_MODE_TUNNEL)
727 xa_set_mark(&ipsec->sadb, sa_entry->ipsec_obj_id,
728 MLX5E_IPSEC_TUNNEL_SA);
731 x->xso.offload_handle = (unsigned long)sa_entry;
735 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
737 mlx5_ipsec_free_sa_ctx(sa_entry);
739 kfree(sa_entry->dwork);
742 kfree(sa_entry->work->data);
743 kfree(sa_entry->work);
745 mlx5_eswitch_unblock_ipsec(priv->mdev);
748 NL_SET_ERR_MSG_WEAK_MOD(extack, "Device failed to offload this state");
752 static void mlx5e_xfrm_del_state(struct xfrm_state *x)
754 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
755 struct mlx5_accel_esp_xfrm_attrs *attrs = &sa_entry->attrs;
756 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
757 struct mlx5e_ipsec_sa_entry *old;
759 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
762 old = xa_erase_bh(&ipsec->sadb, sa_entry->ipsec_obj_id);
763 WARN_ON(old != sa_entry);
765 if (attrs->mode == XFRM_MODE_TUNNEL &&
766 attrs->type == XFRM_DEV_OFFLOAD_PACKET)
767 /* Make sure that no ARP requests are running in parallel */
768 flush_workqueue(ipsec->wq);
772 static void mlx5e_xfrm_free_state(struct xfrm_state *x)
774 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
775 struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
777 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
781 cancel_work_sync(&sa_entry->work->work);
784 cancel_delayed_work_sync(&sa_entry->dwork->dwork);
786 mlx5e_accel_ipsec_fs_del_rule(sa_entry);
787 mlx5_ipsec_free_sa_ctx(sa_entry);
788 kfree(sa_entry->dwork);
790 kfree(sa_entry->work->data);
791 kfree(sa_entry->work);
792 mlx5_eswitch_unblock_ipsec(ipsec->mdev);
797 static int mlx5e_ipsec_netevent_event(struct notifier_block *nb,
798 unsigned long event, void *ptr)
800 struct mlx5_accel_esp_xfrm_attrs *attrs;
801 struct mlx5e_ipsec_netevent_data *data;
802 struct mlx5e_ipsec_sa_entry *sa_entry;
803 struct mlx5e_ipsec *ipsec;
804 struct neighbour *n = ptr;
805 struct net_device *netdev;
806 struct xfrm_state *x;
809 if (event != NETEVENT_NEIGH_UPDATE || !(n->nud_state & NUD_VALID))
812 ipsec = container_of(nb, struct mlx5e_ipsec, netevent_nb);
813 xa_for_each_marked(&ipsec->sadb, idx, sa_entry, MLX5E_IPSEC_TUNNEL_SA) {
814 attrs = &sa_entry->attrs;
816 if (attrs->family == AF_INET) {
817 if (!neigh_key_eq32(n, &attrs->saddr.a4) &&
818 !neigh_key_eq32(n, &attrs->daddr.a4))
821 if (!neigh_key_eq128(n, &attrs->saddr.a4) &&
822 !neigh_key_eq128(n, &attrs->daddr.a4))
827 netdev = x->xso.real_dev;
828 data = sa_entry->work->data;
830 neigh_ha_snapshot(data->addr, n, netdev);
831 queue_work(ipsec->wq, &sa_entry->work->work);
837 void mlx5e_ipsec_init(struct mlx5e_priv *priv)
839 struct mlx5e_ipsec *ipsec;
842 if (!mlx5_ipsec_device_caps(priv->mdev)) {
843 netdev_dbg(priv->netdev, "Not an IPSec offload device\n");
847 ipsec = kzalloc(sizeof(*ipsec), GFP_KERNEL);
851 xa_init_flags(&ipsec->sadb, XA_FLAGS_ALLOC);
852 ipsec->mdev = priv->mdev;
853 ipsec->wq = alloc_workqueue("mlx5e_ipsec: %s", WQ_UNBOUND, 0,
858 if (mlx5_ipsec_device_caps(priv->mdev) &
859 MLX5_IPSEC_CAP_PACKET_OFFLOAD) {
860 ret = mlx5e_ipsec_aso_init(ipsec);
865 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL) {
866 ipsec->netevent_nb.notifier_call = mlx5e_ipsec_netevent_event;
867 ret = register_netevent_notifier(&ipsec->netevent_nb);
872 ipsec->is_uplink_rep = mlx5e_is_uplink_rep(priv);
873 ret = mlx5e_accel_ipsec_fs_init(ipsec);
877 ipsec->fs = priv->fs;
879 netdev_dbg(priv->netdev, "IPSec attached to netdevice\n");
883 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
884 unregister_netevent_notifier(&ipsec->netevent_nb);
886 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
887 mlx5e_ipsec_aso_cleanup(ipsec);
889 destroy_workqueue(ipsec->wq);
892 mlx5_core_err(priv->mdev, "IPSec initialization failed, %d\n", ret);
896 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv)
898 struct mlx5e_ipsec *ipsec = priv->ipsec;
903 mlx5e_accel_ipsec_fs_cleanup(ipsec);
904 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_TUNNEL)
905 unregister_netevent_notifier(&ipsec->netevent_nb);
906 if (mlx5_ipsec_device_caps(priv->mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
907 mlx5e_ipsec_aso_cleanup(ipsec);
908 destroy_workqueue(ipsec->wq);
913 static bool mlx5e_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
915 if (x->props.family == AF_INET) {
916 /* Offload with IPv4 options is not supported yet */
917 if (ip_hdr(skb)->ihl > 5)
920 /* Offload with IPv6 extension headers is not support yet */
921 if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
928 static void mlx5e_xfrm_advance_esn_state(struct xfrm_state *x)
930 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
931 struct mlx5e_ipsec_work *work = sa_entry->work;
932 struct mlx5e_ipsec_sa_entry *sa_entry_shadow;
935 need_update = mlx5e_ipsec_update_esn_state(sa_entry);
939 sa_entry_shadow = work->data;
940 memset(sa_entry_shadow, 0x00, sizeof(*sa_entry_shadow));
941 mlx5e_ipsec_build_accel_xfrm_attrs(sa_entry, &sa_entry_shadow->attrs);
942 queue_work(sa_entry->ipsec->wq, &work->work);
945 static void mlx5e_xfrm_update_curlft(struct xfrm_state *x)
947 struct mlx5e_ipsec_sa_entry *sa_entry = to_ipsec_sa_entry(x);
948 struct mlx5e_ipsec_rule *ipsec_rule = &sa_entry->ipsec_rule;
949 u64 packets, bytes, lastuse;
951 lockdep_assert(lockdep_is_held(&x->lock) ||
952 lockdep_is_held(&dev_net(x->xso.real_dev)->xfrm.xfrm_cfg_mutex));
954 if (x->xso.flags & XFRM_DEV_OFFLOAD_FLAG_ACQ)
957 mlx5_fc_query_cached(ipsec_rule->fc, &bytes, &packets, &lastuse);
958 x->curlft.packets += packets;
959 x->curlft.bytes += bytes;
962 static int mlx5e_xfrm_validate_policy(struct mlx5_core_dev *mdev,
963 struct xfrm_policy *x,
964 struct netlink_ext_ack *extack)
966 struct xfrm_selector *sel = &x->selector;
968 if (x->type != XFRM_POLICY_TYPE_MAIN) {
969 NL_SET_ERR_MSG_MOD(extack, "Cannot offload non-main policy types");
973 /* Please pay attention that we support only one template */
974 if (x->xfrm_nr > 1) {
975 NL_SET_ERR_MSG_MOD(extack, "Cannot offload more than one template");
979 if (x->xdo.dir != XFRM_DEV_OFFLOAD_IN &&
980 x->xdo.dir != XFRM_DEV_OFFLOAD_OUT) {
981 NL_SET_ERR_MSG_MOD(extack, "Cannot offload forward policy");
985 if (!x->xfrm_vec[0].reqid && sel->proto == IPPROTO_IP &&
986 addr6_all_zero(sel->saddr.a6) && addr6_all_zero(sel->daddr.a6)) {
987 NL_SET_ERR_MSG_MOD(extack, "Unsupported policy with reqid 0 without at least one of upper protocol or ip addr(s) different than 0");
991 if (x->xdo.type != XFRM_DEV_OFFLOAD_PACKET) {
992 NL_SET_ERR_MSG_MOD(extack, "Unsupported xfrm offload type");
996 if (x->selector.proto != IPPROTO_IP &&
997 x->selector.proto != IPPROTO_UDP &&
998 x->selector.proto != IPPROTO_TCP) {
999 NL_SET_ERR_MSG_MOD(extack, "Device does not support upper protocol other than TCP/UDP");
1004 if (!(mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PRIO)) {
1005 NL_SET_ERR_MSG_MOD(extack, "Device does not support policy priority");
1009 if (x->priority == U32_MAX) {
1010 NL_SET_ERR_MSG_MOD(extack, "Device does not support requested policy priority");
1019 mlx5e_ipsec_build_accel_pol_attrs(struct mlx5e_ipsec_pol_entry *pol_entry,
1020 struct mlx5_accel_pol_xfrm_attrs *attrs)
1022 struct xfrm_policy *x = pol_entry->x;
1023 struct xfrm_selector *sel;
1026 memset(attrs, 0, sizeof(*attrs));
1028 memcpy(&attrs->saddr, sel->saddr.a6, sizeof(attrs->saddr));
1029 memcpy(&attrs->daddr, sel->daddr.a6, sizeof(attrs->daddr));
1030 attrs->family = sel->family;
1031 attrs->dir = x->xdo.dir;
1032 attrs->action = x->action;
1033 attrs->type = XFRM_DEV_OFFLOAD_PACKET;
1034 attrs->reqid = x->xfrm_vec[0].reqid;
1035 attrs->upspec.dport = ntohs(sel->dport);
1036 attrs->upspec.dport_mask = ntohs(sel->dport_mask);
1037 attrs->upspec.sport = ntohs(sel->sport);
1038 attrs->upspec.sport_mask = ntohs(sel->sport_mask);
1039 attrs->upspec.proto = sel->proto;
1040 attrs->prio = x->priority;
1043 static int mlx5e_xfrm_add_policy(struct xfrm_policy *x,
1044 struct netlink_ext_ack *extack)
1046 struct net_device *netdev = x->xdo.real_dev;
1047 struct mlx5e_ipsec_pol_entry *pol_entry;
1048 struct mlx5e_priv *priv;
1051 priv = netdev_priv(netdev);
1053 NL_SET_ERR_MSG_MOD(extack, "Device doesn't support IPsec packet offload");
1057 err = mlx5e_xfrm_validate_policy(priv->mdev, x, extack);
1061 pol_entry = kzalloc(sizeof(*pol_entry), GFP_KERNEL);
1066 pol_entry->ipsec = priv->ipsec;
1068 if (!mlx5_eswitch_block_ipsec(priv->mdev)) {
1073 mlx5e_ipsec_build_accel_pol_attrs(pol_entry, &pol_entry->attrs);
1074 err = mlx5e_accel_ipsec_fs_add_pol(pol_entry);
1078 x->xdo.offload_handle = (unsigned long)pol_entry;
1082 mlx5_eswitch_unblock_ipsec(priv->mdev);
1085 NL_SET_ERR_MSG_MOD(extack, "Device failed to offload this policy");
1089 static void mlx5e_xfrm_del_policy(struct xfrm_policy *x)
1091 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1093 mlx5e_accel_ipsec_fs_del_pol(pol_entry);
1094 mlx5_eswitch_unblock_ipsec(pol_entry->ipsec->mdev);
1097 static void mlx5e_xfrm_free_policy(struct xfrm_policy *x)
1099 struct mlx5e_ipsec_pol_entry *pol_entry = to_ipsec_pol_entry(x);
1104 static const struct xfrmdev_ops mlx5e_ipsec_xfrmdev_ops = {
1105 .xdo_dev_state_add = mlx5e_xfrm_add_state,
1106 .xdo_dev_state_delete = mlx5e_xfrm_del_state,
1107 .xdo_dev_state_free = mlx5e_xfrm_free_state,
1108 .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
1109 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1112 static const struct xfrmdev_ops mlx5e_ipsec_packet_xfrmdev_ops = {
1113 .xdo_dev_state_add = mlx5e_xfrm_add_state,
1114 .xdo_dev_state_delete = mlx5e_xfrm_del_state,
1115 .xdo_dev_state_free = mlx5e_xfrm_free_state,
1116 .xdo_dev_offload_ok = mlx5e_ipsec_offload_ok,
1117 .xdo_dev_state_advance_esn = mlx5e_xfrm_advance_esn_state,
1119 .xdo_dev_state_update_curlft = mlx5e_xfrm_update_curlft,
1120 .xdo_dev_policy_add = mlx5e_xfrm_add_policy,
1121 .xdo_dev_policy_delete = mlx5e_xfrm_del_policy,
1122 .xdo_dev_policy_free = mlx5e_xfrm_free_policy,
1125 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
1127 struct mlx5_core_dev *mdev = priv->mdev;
1128 struct net_device *netdev = priv->netdev;
1130 if (!mlx5_ipsec_device_caps(mdev))
1133 mlx5_core_info(mdev, "mlx5e: IPSec ESP acceleration enabled\n");
1135 if (mlx5_ipsec_device_caps(mdev) & MLX5_IPSEC_CAP_PACKET_OFFLOAD)
1136 netdev->xfrmdev_ops = &mlx5e_ipsec_packet_xfrmdev_ops;
1138 netdev->xfrmdev_ops = &mlx5e_ipsec_xfrmdev_ops;
1140 netdev->features |= NETIF_F_HW_ESP;
1141 netdev->hw_enc_features |= NETIF_F_HW_ESP;
1143 if (!MLX5_CAP_ETH(mdev, swp_csum)) {
1144 mlx5_core_dbg(mdev, "mlx5e: SWP checksum not supported\n");
1148 netdev->features |= NETIF_F_HW_ESP_TX_CSUM;
1149 netdev->hw_enc_features |= NETIF_F_HW_ESP_TX_CSUM;
1151 if (!MLX5_CAP_ETH(mdev, swp_lso)) {
1152 mlx5_core_dbg(mdev, "mlx5e: ESP LSO not supported\n");
1156 netdev->gso_partial_features |= NETIF_F_GSO_ESP;
1157 mlx5_core_dbg(mdev, "mlx5e: ESP GSO capability turned on\n");
1158 netdev->features |= NETIF_F_GSO_ESP;
1159 netdev->hw_features |= NETIF_F_GSO_ESP;
1160 netdev->hw_enc_features |= NETIF_F_GSO_ESP;