Correct .gbs.conf settings
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / xfrm / xfrm_policy.c
1 /*
2  * xfrm_policy.c
3  *
4  * Changes:
5  *      Mitsuru KANDA @USAGI
6  *      Kazunori MIYAZAWA @USAGI
7  *      Kunihiro Ishiguro <kunihiro@ipinfusion.com>
8  *              IPv6 support
9  *      Kazunori MIYAZAWA @USAGI
10  *      YOSHIFUJI Hideaki
11  *              Split up af-specific portion
12  *      Derek Atkins <derek@ihtfp.com>          Add the post_input processor
13  *
14  */
15
16 #include <linux/err.h>
17 #include <linux/slab.h>
18 #include <linux/kmod.h>
19 #include <linux/list.h>
20 #include <linux/spinlock.h>
21 #include <linux/workqueue.h>
22 #include <linux/notifier.h>
23 #include <linux/netdevice.h>
24 #include <linux/netfilter.h>
25 #include <linux/module.h>
26 #include <linux/cache.h>
27 #include <linux/audit.h>
28 #include <net/dst.h>
29 #include <net/flow.h>
30 #include <net/xfrm.h>
31 #include <net/ip.h>
32 #ifdef CONFIG_XFRM_STATISTICS
33 #include <net/snmp.h>
34 #endif
35
36 #include "xfrm_hash.h"
37
38 #define XFRM_QUEUE_TMO_MIN ((unsigned)(HZ/10))
39 #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ))
40 #define XFRM_MAX_QUEUE_LEN      100
41
42 static struct dst_entry *xfrm_policy_sk_bundles;
43
44 struct xfrm_flo {
45         struct dst_entry *dst_orig;
46         u8 flags;
47 };
48
49 static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock);
50 static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO]
51                                                 __read_mostly;
52
53 static struct kmem_cache *xfrm_dst_cache __read_mostly;
54
55 static void xfrm_init_pmtu(struct dst_entry *dst);
56 static int stale_bundle(struct dst_entry *dst);
57 static int xfrm_bundle_ok(struct xfrm_dst *xdst);
58 static void xfrm_policy_queue_process(unsigned long arg);
59
60 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
61                                                 int dir);
62
63 static inline bool
64 __xfrm4_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
65 {
66         const struct flowi4 *fl4 = &fl->u.ip4;
67
68         return  addr4_match(fl4->daddr, sel->daddr.a4, sel->prefixlen_d) &&
69                 addr4_match(fl4->saddr, sel->saddr.a4, sel->prefixlen_s) &&
70                 !((xfrm_flowi_dport(fl, &fl4->uli) ^ sel->dport) & sel->dport_mask) &&
71                 !((xfrm_flowi_sport(fl, &fl4->uli) ^ sel->sport) & sel->sport_mask) &&
72                 (fl4->flowi4_proto == sel->proto || !sel->proto) &&
73                 (fl4->flowi4_oif == sel->ifindex || !sel->ifindex);
74 }
75
76 static inline bool
77 __xfrm6_selector_match(const struct xfrm_selector *sel, const struct flowi *fl)
78 {
79         const struct flowi6 *fl6 = &fl->u.ip6;
80
81         return  addr_match(&fl6->daddr, &sel->daddr, sel->prefixlen_d) &&
82                 addr_match(&fl6->saddr, &sel->saddr, sel->prefixlen_s) &&
83                 !((xfrm_flowi_dport(fl, &fl6->uli) ^ sel->dport) & sel->dport_mask) &&
84                 !((xfrm_flowi_sport(fl, &fl6->uli) ^ sel->sport) & sel->sport_mask) &&
85                 (fl6->flowi6_proto == sel->proto || !sel->proto) &&
86                 (fl6->flowi6_oif == sel->ifindex || !sel->ifindex);
87 }
88
89 bool xfrm_selector_match(const struct xfrm_selector *sel, const struct flowi *fl,
90                          unsigned short family)
91 {
92         switch (family) {
93         case AF_INET:
94                 return __xfrm4_selector_match(sel, fl);
95         case AF_INET6:
96                 return __xfrm6_selector_match(sel, fl);
97         }
98         return false;
99 }
100
101 static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family)
102 {
103         struct xfrm_policy_afinfo *afinfo;
104
105         if (unlikely(family >= NPROTO))
106                 return NULL;
107         rcu_read_lock();
108         afinfo = rcu_dereference(xfrm_policy_afinfo[family]);
109         if (unlikely(!afinfo))
110                 rcu_read_unlock();
111         return afinfo;
112 }
113
114 static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo)
115 {
116         rcu_read_unlock();
117 }
118
119 static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos,
120                                                   const xfrm_address_t *saddr,
121                                                   const xfrm_address_t *daddr,
122                                                   int family)
123 {
124         struct xfrm_policy_afinfo *afinfo;
125         struct dst_entry *dst;
126
127         afinfo = xfrm_policy_get_afinfo(family);
128         if (unlikely(afinfo == NULL))
129                 return ERR_PTR(-EAFNOSUPPORT);
130
131         dst = afinfo->dst_lookup(net, tos, saddr, daddr);
132
133         xfrm_policy_put_afinfo(afinfo);
134
135         return dst;
136 }
137
138 static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos,
139                                                 xfrm_address_t *prev_saddr,
140                                                 xfrm_address_t *prev_daddr,
141                                                 int family)
142 {
143         struct net *net = xs_net(x);
144         xfrm_address_t *saddr = &x->props.saddr;
145         xfrm_address_t *daddr = &x->id.daddr;
146         struct dst_entry *dst;
147
148         if (x->type->flags & XFRM_TYPE_LOCAL_COADDR) {
149                 saddr = x->coaddr;
150                 daddr = prev_daddr;
151         }
152         if (x->type->flags & XFRM_TYPE_REMOTE_COADDR) {
153                 saddr = prev_saddr;
154                 daddr = x->coaddr;
155         }
156
157         dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family);
158
159         if (!IS_ERR(dst)) {
160                 if (prev_saddr != saddr)
161                         memcpy(prev_saddr, saddr,  sizeof(*prev_saddr));
162                 if (prev_daddr != daddr)
163                         memcpy(prev_daddr, daddr,  sizeof(*prev_daddr));
164         }
165
166         return dst;
167 }
168
169 static inline unsigned long make_jiffies(long secs)
170 {
171         if (secs >= (MAX_SCHEDULE_TIMEOUT-1)/HZ)
172                 return MAX_SCHEDULE_TIMEOUT-1;
173         else
174                 return secs*HZ;
175 }
176
177 static void xfrm_policy_timer(unsigned long data)
178 {
179         struct xfrm_policy *xp = (struct xfrm_policy *)data;
180         unsigned long now = get_seconds();
181         long next = LONG_MAX;
182         int warn = 0;
183         int dir;
184
185         read_lock(&xp->lock);
186
187         if (unlikely(xp->walk.dead))
188                 goto out;
189
190         dir = xfrm_policy_id2dir(xp->index);
191
192         if (xp->lft.hard_add_expires_seconds) {
193                 long tmo = xp->lft.hard_add_expires_seconds +
194                         xp->curlft.add_time - now;
195                 if (tmo <= 0)
196                         goto expired;
197                 if (tmo < next)
198                         next = tmo;
199         }
200         if (xp->lft.hard_use_expires_seconds) {
201                 long tmo = xp->lft.hard_use_expires_seconds +
202                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
203                 if (tmo <= 0)
204                         goto expired;
205                 if (tmo < next)
206                         next = tmo;
207         }
208         if (xp->lft.soft_add_expires_seconds) {
209                 long tmo = xp->lft.soft_add_expires_seconds +
210                         xp->curlft.add_time - now;
211                 if (tmo <= 0) {
212                         warn = 1;
213                         tmo = XFRM_KM_TIMEOUT;
214                 }
215                 if (tmo < next)
216                         next = tmo;
217         }
218         if (xp->lft.soft_use_expires_seconds) {
219                 long tmo = xp->lft.soft_use_expires_seconds +
220                         (xp->curlft.use_time ? : xp->curlft.add_time) - now;
221                 if (tmo <= 0) {
222                         warn = 1;
223                         tmo = XFRM_KM_TIMEOUT;
224                 }
225                 if (tmo < next)
226                         next = tmo;
227         }
228
229         if (warn)
230                 km_policy_expired(xp, dir, 0, 0);
231         if (next != LONG_MAX &&
232             !mod_timer(&xp->timer, jiffies + make_jiffies(next)))
233                 xfrm_pol_hold(xp);
234
235 out:
236         read_unlock(&xp->lock);
237         xfrm_pol_put(xp);
238         return;
239
240 expired:
241         read_unlock(&xp->lock);
242         if (!xfrm_policy_delete(xp, dir))
243                 km_policy_expired(xp, dir, 1, 0);
244         xfrm_pol_put(xp);
245 }
246
247 static struct flow_cache_object *xfrm_policy_flo_get(struct flow_cache_object *flo)
248 {
249         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
250
251         if (unlikely(pol->walk.dead))
252                 flo = NULL;
253         else
254                 xfrm_pol_hold(pol);
255
256         return flo;
257 }
258
259 static int xfrm_policy_flo_check(struct flow_cache_object *flo)
260 {
261         struct xfrm_policy *pol = container_of(flo, struct xfrm_policy, flo);
262
263         return !pol->walk.dead;
264 }
265
266 static void xfrm_policy_flo_delete(struct flow_cache_object *flo)
267 {
268         xfrm_pol_put(container_of(flo, struct xfrm_policy, flo));
269 }
270
271 static const struct flow_cache_ops xfrm_policy_fc_ops = {
272         .get = xfrm_policy_flo_get,
273         .check = xfrm_policy_flo_check,
274         .delete = xfrm_policy_flo_delete,
275 };
276
277 /* Allocate xfrm_policy. Not used here, it is supposed to be used by pfkeyv2
278  * SPD calls.
279  */
280
281 struct xfrm_policy *xfrm_policy_alloc(struct net *net, gfp_t gfp)
282 {
283         struct xfrm_policy *policy;
284
285         policy = kzalloc(sizeof(struct xfrm_policy), gfp);
286
287         if (policy) {
288                 write_pnet(&policy->xp_net, net);
289                 INIT_LIST_HEAD(&policy->walk.all);
290                 INIT_HLIST_NODE(&policy->bydst);
291                 INIT_HLIST_NODE(&policy->byidx);
292                 rwlock_init(&policy->lock);
293                 atomic_set(&policy->refcnt, 1);
294                 skb_queue_head_init(&policy->polq.hold_queue);
295                 setup_timer(&policy->timer, xfrm_policy_timer,
296                                 (unsigned long)policy);
297                 setup_timer(&policy->polq.hold_timer, xfrm_policy_queue_process,
298                             (unsigned long)policy);
299                 policy->flo.ops = &xfrm_policy_fc_ops;
300         }
301         return policy;
302 }
303 EXPORT_SYMBOL(xfrm_policy_alloc);
304
305 /* Destroy xfrm_policy: descendant resources must be released to this moment. */
306
307 void xfrm_policy_destroy(struct xfrm_policy *policy)
308 {
309         BUG_ON(!policy->walk.dead);
310
311         if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer))
312                 BUG();
313
314         security_xfrm_policy_free(policy->security);
315         kfree(policy);
316 }
317 EXPORT_SYMBOL(xfrm_policy_destroy);
318
319 static void xfrm_queue_purge(struct sk_buff_head *list)
320 {
321         struct sk_buff *skb;
322
323         while ((skb = skb_dequeue(list)) != NULL)
324                 kfree_skb(skb);
325 }
326
327 /* Rule must be locked. Release descentant resources, announce
328  * entry dead. The rule must be unlinked from lists to the moment.
329  */
330
331 static void xfrm_policy_kill(struct xfrm_policy *policy)
332 {
333         policy->walk.dead = 1;
334
335         atomic_inc(&policy->genid);
336
337         if (del_timer(&policy->polq.hold_timer))
338                 xfrm_pol_put(policy);
339         xfrm_queue_purge(&policy->polq.hold_queue);
340
341         if (del_timer(&policy->timer))
342                 xfrm_pol_put(policy);
343
344         xfrm_pol_put(policy);
345 }
346
347 static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024;
348
349 static inline unsigned int idx_hash(struct net *net, u32 index)
350 {
351         return __idx_hash(index, net->xfrm.policy_idx_hmask);
352 }
353
354 static struct hlist_head *policy_hash_bysel(struct net *net,
355                                             const struct xfrm_selector *sel,
356                                             unsigned short family, int dir)
357 {
358         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
359         unsigned int hash = __sel_hash(sel, family, hmask);
360
361         return (hash == hmask + 1 ?
362                 &net->xfrm.policy_inexact[dir] :
363                 net->xfrm.policy_bydst[dir].table + hash);
364 }
365
366 static struct hlist_head *policy_hash_direct(struct net *net,
367                                              const xfrm_address_t *daddr,
368                                              const xfrm_address_t *saddr,
369                                              unsigned short family, int dir)
370 {
371         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
372         unsigned int hash = __addr_hash(daddr, saddr, family, hmask);
373
374         return net->xfrm.policy_bydst[dir].table + hash;
375 }
376
377 static void xfrm_dst_hash_transfer(struct hlist_head *list,
378                                    struct hlist_head *ndsttable,
379                                    unsigned int nhashmask)
380 {
381         struct hlist_node *tmp, *entry0 = NULL;
382         struct xfrm_policy *pol;
383         unsigned int h0 = 0;
384
385 redo:
386         hlist_for_each_entry_safe(pol, tmp, list, bydst) {
387                 unsigned int h;
388
389                 h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr,
390                                 pol->family, nhashmask);
391                 if (!entry0) {
392                         hlist_del(&pol->bydst);
393                         hlist_add_head(&pol->bydst, ndsttable+h);
394                         h0 = h;
395                 } else {
396                         if (h != h0)
397                                 continue;
398                         hlist_del(&pol->bydst);
399                         hlist_add_after(entry0, &pol->bydst);
400                 }
401                 entry0 = &pol->bydst;
402         }
403         if (!hlist_empty(list)) {
404                 entry0 = NULL;
405                 goto redo;
406         }
407 }
408
409 static void xfrm_idx_hash_transfer(struct hlist_head *list,
410                                    struct hlist_head *nidxtable,
411                                    unsigned int nhashmask)
412 {
413         struct hlist_node *tmp;
414         struct xfrm_policy *pol;
415
416         hlist_for_each_entry_safe(pol, tmp, list, byidx) {
417                 unsigned int h;
418
419                 h = __idx_hash(pol->index, nhashmask);
420                 hlist_add_head(&pol->byidx, nidxtable+h);
421         }
422 }
423
424 static unsigned long xfrm_new_hash_mask(unsigned int old_hmask)
425 {
426         return ((old_hmask + 1) << 1) - 1;
427 }
428
429 static void xfrm_bydst_resize(struct net *net, int dir)
430 {
431         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
432         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
433         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
434         struct hlist_head *odst = net->xfrm.policy_bydst[dir].table;
435         struct hlist_head *ndst = xfrm_hash_alloc(nsize);
436         int i;
437
438         if (!ndst)
439                 return;
440
441         write_lock_bh(&net->xfrm.xfrm_policy_lock);
442
443         for (i = hmask; i >= 0; i--)
444                 xfrm_dst_hash_transfer(odst + i, ndst, nhashmask);
445
446         net->xfrm.policy_bydst[dir].table = ndst;
447         net->xfrm.policy_bydst[dir].hmask = nhashmask;
448
449         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
450
451         xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head));
452 }
453
454 static void xfrm_byidx_resize(struct net *net, int total)
455 {
456         unsigned int hmask = net->xfrm.policy_idx_hmask;
457         unsigned int nhashmask = xfrm_new_hash_mask(hmask);
458         unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head);
459         struct hlist_head *oidx = net->xfrm.policy_byidx;
460         struct hlist_head *nidx = xfrm_hash_alloc(nsize);
461         int i;
462
463         if (!nidx)
464                 return;
465
466         write_lock_bh(&net->xfrm.xfrm_policy_lock);
467
468         for (i = hmask; i >= 0; i--)
469                 xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask);
470
471         net->xfrm.policy_byidx = nidx;
472         net->xfrm.policy_idx_hmask = nhashmask;
473
474         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
475
476         xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head));
477 }
478
479 static inline int xfrm_bydst_should_resize(struct net *net, int dir, int *total)
480 {
481         unsigned int cnt = net->xfrm.policy_count[dir];
482         unsigned int hmask = net->xfrm.policy_bydst[dir].hmask;
483
484         if (total)
485                 *total += cnt;
486
487         if ((hmask + 1) < xfrm_policy_hashmax &&
488             cnt > hmask)
489                 return 1;
490
491         return 0;
492 }
493
494 static inline int xfrm_byidx_should_resize(struct net *net, int total)
495 {
496         unsigned int hmask = net->xfrm.policy_idx_hmask;
497
498         if ((hmask + 1) < xfrm_policy_hashmax &&
499             total > hmask)
500                 return 1;
501
502         return 0;
503 }
504
505 void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si)
506 {
507         read_lock_bh(&net->xfrm.xfrm_policy_lock);
508         si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN];
509         si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT];
510         si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD];
511         si->inscnt = net->xfrm.policy_count[XFRM_POLICY_IN+XFRM_POLICY_MAX];
512         si->outscnt = net->xfrm.policy_count[XFRM_POLICY_OUT+XFRM_POLICY_MAX];
513         si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX];
514         si->spdhcnt = net->xfrm.policy_idx_hmask;
515         si->spdhmcnt = xfrm_policy_hashmax;
516         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
517 }
518 EXPORT_SYMBOL(xfrm_spd_getinfo);
519
520 static DEFINE_MUTEX(hash_resize_mutex);
521 static void xfrm_hash_resize(struct work_struct *work)
522 {
523         struct net *net = container_of(work, struct net, xfrm.policy_hash_work);
524         int dir, total;
525
526         mutex_lock(&hash_resize_mutex);
527
528         total = 0;
529         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
530                 if (xfrm_bydst_should_resize(net, dir, &total))
531                         xfrm_bydst_resize(net, dir);
532         }
533         if (xfrm_byidx_should_resize(net, total))
534                 xfrm_byidx_resize(net, total);
535
536         mutex_unlock(&hash_resize_mutex);
537 }
538
539 /* Generate new index... KAME seems to generate them ordered by cost
540  * of an absolute inpredictability of ordering of rules. This will not pass. */
541 static u32 xfrm_gen_index(struct net *net, int dir, u32 index)
542 {
543         static u32 idx_generator;
544
545         for (;;) {
546                 struct hlist_head *list;
547                 struct xfrm_policy *p;
548                 u32 idx;
549                 int found;
550
551                 if (!index) {
552                         idx = (idx_generator | dir);
553                         idx_generator += 8;
554                 } else {
555                         idx = index;
556                         index = 0;
557                 }
558
559                 if (idx == 0)
560                         idx = 8;
561                 list = net->xfrm.policy_byidx + idx_hash(net, idx);
562                 found = 0;
563                 hlist_for_each_entry(p, list, byidx) {
564                         if (p->index == idx) {
565                                 found = 1;
566                                 break;
567                         }
568                 }
569                 if (!found)
570                         return idx;
571         }
572 }
573
574 static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2)
575 {
576         u32 *p1 = (u32 *) s1;
577         u32 *p2 = (u32 *) s2;
578         int len = sizeof(struct xfrm_selector) / sizeof(u32);
579         int i;
580
581         for (i = 0; i < len; i++) {
582                 if (p1[i] != p2[i])
583                         return 1;
584         }
585
586         return 0;
587 }
588
589 static void xfrm_policy_requeue(struct xfrm_policy *old,
590                                 struct xfrm_policy *new)
591 {
592         struct xfrm_policy_queue *pq = &old->polq;
593         struct sk_buff_head list;
594
595         __skb_queue_head_init(&list);
596
597         spin_lock_bh(&pq->hold_queue.lock);
598         skb_queue_splice_init(&pq->hold_queue, &list);
599         if (del_timer(&pq->hold_timer))
600                 xfrm_pol_put(old);
601         spin_unlock_bh(&pq->hold_queue.lock);
602
603         if (skb_queue_empty(&list))
604                 return;
605
606         pq = &new->polq;
607
608         spin_lock_bh(&pq->hold_queue.lock);
609         skb_queue_splice(&list, &pq->hold_queue);
610         pq->timeout = XFRM_QUEUE_TMO_MIN;
611         if (!mod_timer(&pq->hold_timer, jiffies))
612                 xfrm_pol_hold(new);
613         spin_unlock_bh(&pq->hold_queue.lock);
614 }
615
616 static bool xfrm_policy_mark_match(struct xfrm_policy *policy,
617                                    struct xfrm_policy *pol)
618 {
619         u32 mark = policy->mark.v & policy->mark.m;
620
621         if (policy->mark.v == pol->mark.v && policy->mark.m == pol->mark.m)
622                 return true;
623
624         if ((mark & pol->mark.m) == pol->mark.v &&
625             policy->priority == pol->priority)
626                 return true;
627
628         return false;
629 }
630
631 int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl)
632 {
633         struct net *net = xp_net(policy);
634         struct xfrm_policy *pol;
635         struct xfrm_policy *delpol;
636         struct hlist_head *chain;
637         struct hlist_node *newpos;
638
639         write_lock_bh(&net->xfrm.xfrm_policy_lock);
640         chain = policy_hash_bysel(net, &policy->selector, policy->family, dir);
641         delpol = NULL;
642         newpos = NULL;
643         hlist_for_each_entry(pol, chain, bydst) {
644                 if (pol->type == policy->type &&
645                     !selector_cmp(&pol->selector, &policy->selector) &&
646                     xfrm_policy_mark_match(policy, pol) &&
647                     xfrm_sec_ctx_match(pol->security, policy->security) &&
648                     !WARN_ON(delpol)) {
649                         if (excl) {
650                                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
651                                 return -EEXIST;
652                         }
653                         delpol = pol;
654                         if (policy->priority > pol->priority)
655                                 continue;
656                 } else if (policy->priority >= pol->priority) {
657                         newpos = &pol->bydst;
658                         continue;
659                 }
660                 if (delpol)
661                         break;
662         }
663         if (newpos)
664                 hlist_add_after(newpos, &policy->bydst);
665         else
666                 hlist_add_head(&policy->bydst, chain);
667         xfrm_pol_hold(policy);
668         net->xfrm.policy_count[dir]++;
669         atomic_inc(&flow_cache_genid);
670
671         /* After previous checking, family can either be AF_INET or AF_INET6 */
672         if (policy->family == AF_INET)
673                 rt_genid_bump_ipv4(net);
674         else
675                 rt_genid_bump_ipv6(net);
676
677         if (delpol) {
678                 xfrm_policy_requeue(delpol, policy);
679                 __xfrm_policy_unlink(delpol, dir);
680         }
681         policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index);
682         hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index));
683         policy->curlft.add_time = get_seconds();
684         policy->curlft.use_time = 0;
685         if (!mod_timer(&policy->timer, jiffies + HZ))
686                 xfrm_pol_hold(policy);
687         list_add(&policy->walk.all, &net->xfrm.policy_all);
688         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
689
690         if (delpol)
691                 xfrm_policy_kill(delpol);
692         else if (xfrm_bydst_should_resize(net, dir, NULL))
693                 schedule_work(&net->xfrm.policy_hash_work);
694
695         return 0;
696 }
697 EXPORT_SYMBOL(xfrm_policy_insert);
698
699 struct xfrm_policy *xfrm_policy_bysel_ctx(struct net *net, u32 mark, u8 type,
700                                           int dir, struct xfrm_selector *sel,
701                                           struct xfrm_sec_ctx *ctx, int delete,
702                                           int *err)
703 {
704         struct xfrm_policy *pol, *ret;
705         struct hlist_head *chain;
706
707         *err = 0;
708         write_lock_bh(&net->xfrm.xfrm_policy_lock);
709         chain = policy_hash_bysel(net, sel, sel->family, dir);
710         ret = NULL;
711         hlist_for_each_entry(pol, chain, bydst) {
712                 if (pol->type == type &&
713                     (mark & pol->mark.m) == pol->mark.v &&
714                     !selector_cmp(sel, &pol->selector) &&
715                     xfrm_sec_ctx_match(ctx, pol->security)) {
716                         xfrm_pol_hold(pol);
717                         if (delete) {
718                                 *err = security_xfrm_policy_delete(
719                                                                 pol->security);
720                                 if (*err) {
721                                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
722                                         return pol;
723                                 }
724                                 __xfrm_policy_unlink(pol, dir);
725                         }
726                         ret = pol;
727                         break;
728                 }
729         }
730         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
731
732         if (ret && delete)
733                 xfrm_policy_kill(ret);
734         return ret;
735 }
736 EXPORT_SYMBOL(xfrm_policy_bysel_ctx);
737
738 struct xfrm_policy *xfrm_policy_byid(struct net *net, u32 mark, u8 type,
739                                      int dir, u32 id, int delete, int *err)
740 {
741         struct xfrm_policy *pol, *ret;
742         struct hlist_head *chain;
743
744         *err = -ENOENT;
745         if (xfrm_policy_id2dir(id) != dir)
746                 return NULL;
747
748         *err = 0;
749         write_lock_bh(&net->xfrm.xfrm_policy_lock);
750         chain = net->xfrm.policy_byidx + idx_hash(net, id);
751         ret = NULL;
752         hlist_for_each_entry(pol, chain, byidx) {
753                 if (pol->type == type && pol->index == id &&
754                     (mark & pol->mark.m) == pol->mark.v) {
755                         xfrm_pol_hold(pol);
756                         if (delete) {
757                                 *err = security_xfrm_policy_delete(
758                                                                 pol->security);
759                                 if (*err) {
760                                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
761                                         return pol;
762                                 }
763                                 __xfrm_policy_unlink(pol, dir);
764                         }
765                         ret = pol;
766                         break;
767                 }
768         }
769         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
770
771         if (ret && delete)
772                 xfrm_policy_kill(ret);
773         return ret;
774 }
775 EXPORT_SYMBOL(xfrm_policy_byid);
776
777 #ifdef CONFIG_SECURITY_NETWORK_XFRM
778 static inline int
779 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
780 {
781         int dir, err = 0;
782
783         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
784                 struct xfrm_policy *pol;
785                 int i;
786
787                 hlist_for_each_entry(pol,
788                                      &net->xfrm.policy_inexact[dir], bydst) {
789                         if (pol->type != type)
790                                 continue;
791                         err = security_xfrm_policy_delete(pol->security);
792                         if (err) {
793                                 xfrm_audit_policy_delete(pol, 0,
794                                                          audit_info->loginuid,
795                                                          audit_info->sessionid,
796                                                          audit_info->secid);
797                                 return err;
798                         }
799                 }
800                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
801                         hlist_for_each_entry(pol,
802                                              net->xfrm.policy_bydst[dir].table + i,
803                                              bydst) {
804                                 if (pol->type != type)
805                                         continue;
806                                 err = security_xfrm_policy_delete(
807                                                                 pol->security);
808                                 if (err) {
809                                         xfrm_audit_policy_delete(pol, 0,
810                                                         audit_info->loginuid,
811                                                         audit_info->sessionid,
812                                                         audit_info->secid);
813                                         return err;
814                                 }
815                         }
816                 }
817         }
818         return err;
819 }
820 #else
821 static inline int
822 xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info)
823 {
824         return 0;
825 }
826 #endif
827
828 int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info)
829 {
830         int dir, err = 0, cnt = 0;
831
832         write_lock_bh(&net->xfrm.xfrm_policy_lock);
833
834         err = xfrm_policy_flush_secctx_check(net, type, audit_info);
835         if (err)
836                 goto out;
837
838         for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
839                 struct xfrm_policy *pol;
840                 int i;
841
842         again1:
843                 hlist_for_each_entry(pol,
844                                      &net->xfrm.policy_inexact[dir], bydst) {
845                         if (pol->type != type)
846                                 continue;
847                         __xfrm_policy_unlink(pol, dir);
848                         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
849                         cnt++;
850
851                         xfrm_audit_policy_delete(pol, 1, audit_info->loginuid,
852                                                  audit_info->sessionid,
853                                                  audit_info->secid);
854
855                         xfrm_policy_kill(pol);
856
857                         write_lock_bh(&net->xfrm.xfrm_policy_lock);
858                         goto again1;
859                 }
860
861                 for (i = net->xfrm.policy_bydst[dir].hmask; i >= 0; i--) {
862         again2:
863                         hlist_for_each_entry(pol,
864                                              net->xfrm.policy_bydst[dir].table + i,
865                                              bydst) {
866                                 if (pol->type != type)
867                                         continue;
868                                 __xfrm_policy_unlink(pol, dir);
869                                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
870                                 cnt++;
871
872                                 xfrm_audit_policy_delete(pol, 1,
873                                                          audit_info->loginuid,
874                                                          audit_info->sessionid,
875                                                          audit_info->secid);
876                                 xfrm_policy_kill(pol);
877
878                                 write_lock_bh(&net->xfrm.xfrm_policy_lock);
879                                 goto again2;
880                         }
881                 }
882
883         }
884         if (!cnt)
885                 err = -ESRCH;
886 out:
887         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
888         return err;
889 }
890 EXPORT_SYMBOL(xfrm_policy_flush);
891
892 int xfrm_policy_walk(struct net *net, struct xfrm_policy_walk *walk,
893                      int (*func)(struct xfrm_policy *, int, int, void*),
894                      void *data)
895 {
896         struct xfrm_policy *pol;
897         struct xfrm_policy_walk_entry *x;
898         int error = 0;
899
900         if (walk->type >= XFRM_POLICY_TYPE_MAX &&
901             walk->type != XFRM_POLICY_TYPE_ANY)
902                 return -EINVAL;
903
904         if (list_empty(&walk->walk.all) && walk->seq != 0)
905                 return 0;
906
907         write_lock_bh(&net->xfrm.xfrm_policy_lock);
908         if (list_empty(&walk->walk.all))
909                 x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all);
910         else
911                 x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all);
912         list_for_each_entry_from(x, &net->xfrm.policy_all, all) {
913                 if (x->dead)
914                         continue;
915                 pol = container_of(x, struct xfrm_policy, walk);
916                 if (walk->type != XFRM_POLICY_TYPE_ANY &&
917                     walk->type != pol->type)
918                         continue;
919                 error = func(pol, xfrm_policy_id2dir(pol->index),
920                              walk->seq, data);
921                 if (error) {
922                         list_move_tail(&walk->walk.all, &x->all);
923                         goto out;
924                 }
925                 walk->seq++;
926         }
927         if (walk->seq == 0) {
928                 error = -ENOENT;
929                 goto out;
930         }
931         list_del_init(&walk->walk.all);
932 out:
933         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
934         return error;
935 }
936 EXPORT_SYMBOL(xfrm_policy_walk);
937
938 void xfrm_policy_walk_init(struct xfrm_policy_walk *walk, u8 type)
939 {
940         INIT_LIST_HEAD(&walk->walk.all);
941         walk->walk.dead = 1;
942         walk->type = type;
943         walk->seq = 0;
944 }
945 EXPORT_SYMBOL(xfrm_policy_walk_init);
946
947 void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net)
948 {
949         if (list_empty(&walk->walk.all))
950                 return;
951
952         write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */
953         list_del(&walk->walk.all);
954         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
955 }
956 EXPORT_SYMBOL(xfrm_policy_walk_done);
957
958 /*
959  * Find policy to apply to this flow.
960  *
961  * Returns 0 if policy found, else an -errno.
962  */
963 static int xfrm_policy_match(const struct xfrm_policy *pol,
964                              const struct flowi *fl,
965                              u8 type, u16 family, int dir)
966 {
967         const struct xfrm_selector *sel = &pol->selector;
968         int ret = -ESRCH;
969         bool match;
970
971         if (pol->family != family ||
972             (fl->flowi_mark & pol->mark.m) != pol->mark.v ||
973             pol->type != type)
974                 return ret;
975
976         match = xfrm_selector_match(sel, fl, family);
977         if (match)
978                 ret = security_xfrm_policy_lookup(pol->security, fl->flowi_secid,
979                                                   dir);
980
981         return ret;
982 }
983
984 static struct xfrm_policy *xfrm_policy_lookup_bytype(struct net *net, u8 type,
985                                                      const struct flowi *fl,
986                                                      u16 family, u8 dir)
987 {
988         int err;
989         struct xfrm_policy *pol, *ret;
990         const xfrm_address_t *daddr, *saddr;
991         struct hlist_head *chain;
992         u32 priority = ~0U;
993
994         daddr = xfrm_flowi_daddr(fl, family);
995         saddr = xfrm_flowi_saddr(fl, family);
996         if (unlikely(!daddr || !saddr))
997                 return NULL;
998
999         read_lock_bh(&net->xfrm.xfrm_policy_lock);
1000         chain = policy_hash_direct(net, daddr, saddr, family, dir);
1001         ret = NULL;
1002         hlist_for_each_entry(pol, chain, bydst) {
1003                 err = xfrm_policy_match(pol, fl, type, family, dir);
1004                 if (err) {
1005                         if (err == -ESRCH)
1006                                 continue;
1007                         else {
1008                                 ret = ERR_PTR(err);
1009                                 goto fail;
1010                         }
1011                 } else {
1012                         ret = pol;
1013                         priority = ret->priority;
1014                         break;
1015                 }
1016         }
1017         chain = &net->xfrm.policy_inexact[dir];
1018         hlist_for_each_entry(pol, chain, bydst) {
1019                 err = xfrm_policy_match(pol, fl, type, family, dir);
1020                 if (err) {
1021                         if (err == -ESRCH)
1022                                 continue;
1023                         else {
1024                                 ret = ERR_PTR(err);
1025                                 goto fail;
1026                         }
1027                 } else if (pol->priority < priority) {
1028                         ret = pol;
1029                         break;
1030                 }
1031         }
1032         if (ret)
1033                 xfrm_pol_hold(ret);
1034 fail:
1035         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1036
1037         return ret;
1038 }
1039
1040 static struct xfrm_policy *
1041 __xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir)
1042 {
1043 #ifdef CONFIG_XFRM_SUB_POLICY
1044         struct xfrm_policy *pol;
1045
1046         pol = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_SUB, fl, family, dir);
1047         if (pol != NULL)
1048                 return pol;
1049 #endif
1050         return xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, fl, family, dir);
1051 }
1052
1053 static int flow_to_policy_dir(int dir)
1054 {
1055         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1056             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1057             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1058                 return dir;
1059
1060         switch (dir) {
1061         default:
1062         case FLOW_DIR_IN:
1063                 return XFRM_POLICY_IN;
1064         case FLOW_DIR_OUT:
1065                 return XFRM_POLICY_OUT;
1066         case FLOW_DIR_FWD:
1067                 return XFRM_POLICY_FWD;
1068         }
1069 }
1070
1071 static struct flow_cache_object *
1072 xfrm_policy_lookup(struct net *net, const struct flowi *fl, u16 family,
1073                    u8 dir, struct flow_cache_object *old_obj, void *ctx)
1074 {
1075         struct xfrm_policy *pol;
1076
1077         if (old_obj)
1078                 xfrm_pol_put(container_of(old_obj, struct xfrm_policy, flo));
1079
1080         pol = __xfrm_policy_lookup(net, fl, family, flow_to_policy_dir(dir));
1081         if (IS_ERR_OR_NULL(pol))
1082                 return ERR_CAST(pol);
1083
1084         /* Resolver returns two references:
1085          * one for cache and one for caller of flow_cache_lookup() */
1086         xfrm_pol_hold(pol);
1087
1088         return &pol->flo;
1089 }
1090
1091 static inline int policy_to_flow_dir(int dir)
1092 {
1093         if (XFRM_POLICY_IN == FLOW_DIR_IN &&
1094             XFRM_POLICY_OUT == FLOW_DIR_OUT &&
1095             XFRM_POLICY_FWD == FLOW_DIR_FWD)
1096                 return dir;
1097         switch (dir) {
1098         default:
1099         case XFRM_POLICY_IN:
1100                 return FLOW_DIR_IN;
1101         case XFRM_POLICY_OUT:
1102                 return FLOW_DIR_OUT;
1103         case XFRM_POLICY_FWD:
1104                 return FLOW_DIR_FWD;
1105         }
1106 }
1107
1108 static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir,
1109                                                  const struct flowi *fl)
1110 {
1111         struct xfrm_policy *pol;
1112         struct net *net = sock_net(sk);
1113
1114         read_lock_bh(&net->xfrm.xfrm_policy_lock);
1115         if ((pol = sk->sk_policy[dir]) != NULL) {
1116                 bool match = xfrm_selector_match(&pol->selector, fl,
1117                                                  sk->sk_family);
1118                 int err = 0;
1119
1120                 if (match) {
1121                         if ((sk->sk_mark & pol->mark.m) != pol->mark.v) {
1122                                 pol = NULL;
1123                                 goto out;
1124                         }
1125                         err = security_xfrm_policy_lookup(pol->security,
1126                                                       fl->flowi_secid,
1127                                                       policy_to_flow_dir(dir));
1128                         if (!err)
1129                                 xfrm_pol_hold(pol);
1130                         else if (err == -ESRCH)
1131                                 pol = NULL;
1132                         else
1133                                 pol = ERR_PTR(err);
1134                 } else
1135                         pol = NULL;
1136         }
1137 out:
1138         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
1139         return pol;
1140 }
1141
1142 static void __xfrm_policy_link(struct xfrm_policy *pol, int dir)
1143 {
1144         struct net *net = xp_net(pol);
1145         struct hlist_head *chain = policy_hash_bysel(net, &pol->selector,
1146                                                      pol->family, dir);
1147
1148         list_add(&pol->walk.all, &net->xfrm.policy_all);
1149         hlist_add_head(&pol->bydst, chain);
1150         hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index));
1151         net->xfrm.policy_count[dir]++;
1152         xfrm_pol_hold(pol);
1153
1154         if (xfrm_bydst_should_resize(net, dir, NULL))
1155                 schedule_work(&net->xfrm.policy_hash_work);
1156 }
1157
1158 static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol,
1159                                                 int dir)
1160 {
1161         struct net *net = xp_net(pol);
1162
1163         if (hlist_unhashed(&pol->bydst))
1164                 return NULL;
1165
1166         hlist_del_init(&pol->bydst);
1167         hlist_del(&pol->byidx);
1168         list_del(&pol->walk.all);
1169         net->xfrm.policy_count[dir]--;
1170
1171         return pol;
1172 }
1173
1174 int xfrm_policy_delete(struct xfrm_policy *pol, int dir)
1175 {
1176         struct net *net = xp_net(pol);
1177
1178         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1179         pol = __xfrm_policy_unlink(pol, dir);
1180         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1181         if (pol) {
1182                 xfrm_policy_kill(pol);
1183                 return 0;
1184         }
1185         return -ENOENT;
1186 }
1187 EXPORT_SYMBOL(xfrm_policy_delete);
1188
1189 int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol)
1190 {
1191         struct net *net = xp_net(pol);
1192         struct xfrm_policy *old_pol;
1193
1194 #ifdef CONFIG_XFRM_SUB_POLICY
1195         if (pol && pol->type != XFRM_POLICY_TYPE_MAIN)
1196                 return -EINVAL;
1197 #endif
1198
1199         write_lock_bh(&net->xfrm.xfrm_policy_lock);
1200         old_pol = sk->sk_policy[dir];
1201         sk->sk_policy[dir] = pol;
1202         if (pol) {
1203                 pol->curlft.add_time = get_seconds();
1204                 pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0);
1205                 __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir);
1206         }
1207         if (old_pol) {
1208                 if (pol)
1209                         xfrm_policy_requeue(old_pol, pol);
1210
1211                 /* Unlinking succeeds always. This is the only function
1212                  * allowed to delete or replace socket policy.
1213                  */
1214                 __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir);
1215         }
1216         write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1217
1218         if (old_pol) {
1219                 xfrm_policy_kill(old_pol);
1220         }
1221         return 0;
1222 }
1223
1224 static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir)
1225 {
1226         struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC);
1227         struct net *net = xp_net(old);
1228
1229         if (newp) {
1230                 newp->selector = old->selector;
1231                 if (security_xfrm_policy_clone(old->security,
1232                                                &newp->security)) {
1233                         kfree(newp);
1234                         return NULL;  /* ENOMEM */
1235                 }
1236                 newp->lft = old->lft;
1237                 newp->curlft = old->curlft;
1238                 newp->mark = old->mark;
1239                 newp->action = old->action;
1240                 newp->flags = old->flags;
1241                 newp->xfrm_nr = old->xfrm_nr;
1242                 newp->index = old->index;
1243                 newp->type = old->type;
1244                 memcpy(newp->xfrm_vec, old->xfrm_vec,
1245                        newp->xfrm_nr*sizeof(struct xfrm_tmpl));
1246                 write_lock_bh(&net->xfrm.xfrm_policy_lock);
1247                 __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir);
1248                 write_unlock_bh(&net->xfrm.xfrm_policy_lock);
1249                 xfrm_pol_put(newp);
1250         }
1251         return newp;
1252 }
1253
1254 int __xfrm_sk_clone_policy(struct sock *sk)
1255 {
1256         struct xfrm_policy *p0 = sk->sk_policy[0],
1257                            *p1 = sk->sk_policy[1];
1258
1259         sk->sk_policy[0] = sk->sk_policy[1] = NULL;
1260         if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL)
1261                 return -ENOMEM;
1262         if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL)
1263                 return -ENOMEM;
1264         return 0;
1265 }
1266
1267 static int
1268 xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote,
1269                unsigned short family)
1270 {
1271         int err;
1272         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1273
1274         if (unlikely(afinfo == NULL))
1275                 return -EINVAL;
1276         err = afinfo->get_saddr(net, local, remote);
1277         xfrm_policy_put_afinfo(afinfo);
1278         return err;
1279 }
1280
1281 /* Resolve list of templates for the flow, given policy. */
1282
1283 static int
1284 xfrm_tmpl_resolve_one(struct xfrm_policy *policy, const struct flowi *fl,
1285                       struct xfrm_state **xfrm, unsigned short family)
1286 {
1287         struct net *net = xp_net(policy);
1288         int nx;
1289         int i, error;
1290         xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family);
1291         xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family);
1292         xfrm_address_t tmp;
1293
1294         for (nx = 0, i = 0; i < policy->xfrm_nr; i++) {
1295                 struct xfrm_state *x;
1296                 xfrm_address_t *remote = daddr;
1297                 xfrm_address_t *local  = saddr;
1298                 struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i];
1299
1300                 if (tmpl->mode == XFRM_MODE_TUNNEL ||
1301                     tmpl->mode == XFRM_MODE_BEET) {
1302                         remote = &tmpl->id.daddr;
1303                         local = &tmpl->saddr;
1304                         if (xfrm_addr_any(local, tmpl->encap_family)) {
1305                                 error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family);
1306                                 if (error)
1307                                         goto fail;
1308                                 local = &tmp;
1309                         }
1310                 }
1311
1312                 x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family);
1313
1314                 if (x && x->km.state == XFRM_STATE_VALID) {
1315                         xfrm[nx++] = x;
1316                         daddr = remote;
1317                         saddr = local;
1318                         continue;
1319                 }
1320                 if (x) {
1321                         error = (x->km.state == XFRM_STATE_ERROR ?
1322                                  -EINVAL : -EAGAIN);
1323                         xfrm_state_put(x);
1324                 } else if (error == -ESRCH) {
1325                         error = -EAGAIN;
1326                 }
1327
1328                 if (!tmpl->optional)
1329                         goto fail;
1330         }
1331         return nx;
1332
1333 fail:
1334         for (nx--; nx >= 0; nx--)
1335                 xfrm_state_put(xfrm[nx]);
1336         return error;
1337 }
1338
1339 static int
1340 xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, const struct flowi *fl,
1341                   struct xfrm_state **xfrm, unsigned short family)
1342 {
1343         struct xfrm_state *tp[XFRM_MAX_DEPTH];
1344         struct xfrm_state **tpp = (npols > 1) ? tp : xfrm;
1345         int cnx = 0;
1346         int error;
1347         int ret;
1348         int i;
1349
1350         for (i = 0; i < npols; i++) {
1351                 if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) {
1352                         error = -ENOBUFS;
1353                         goto fail;
1354                 }
1355
1356                 ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family);
1357                 if (ret < 0) {
1358                         error = ret;
1359                         goto fail;
1360                 } else
1361                         cnx += ret;
1362         }
1363
1364         /* found states are sorted for outbound processing */
1365         if (npols > 1)
1366                 xfrm_state_sort(xfrm, tpp, cnx, family);
1367
1368         return cnx;
1369
1370  fail:
1371         for (cnx--; cnx >= 0; cnx--)
1372                 xfrm_state_put(tpp[cnx]);
1373         return error;
1374
1375 }
1376
1377 /* Check that the bundle accepts the flow and its components are
1378  * still valid.
1379  */
1380
1381 static inline int xfrm_get_tos(const struct flowi *fl, int family)
1382 {
1383         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1384         int tos;
1385
1386         if (!afinfo)
1387                 return -EINVAL;
1388
1389         tos = afinfo->get_tos(fl);
1390
1391         xfrm_policy_put_afinfo(afinfo);
1392
1393         return tos;
1394 }
1395
1396 static struct flow_cache_object *xfrm_bundle_flo_get(struct flow_cache_object *flo)
1397 {
1398         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1399         struct dst_entry *dst = &xdst->u.dst;
1400
1401         if (xdst->route == NULL) {
1402                 /* Dummy bundle - if it has xfrms we were not
1403                  * able to build bundle as template resolution failed.
1404                  * It means we need to try again resolving. */
1405                 if (xdst->num_xfrms > 0)
1406                         return NULL;
1407         } else if (dst->flags & DST_XFRM_QUEUE) {
1408                 return NULL;
1409         } else {
1410                 /* Real bundle */
1411                 if (stale_bundle(dst))
1412                         return NULL;
1413         }
1414
1415         dst_hold(dst);
1416         return flo;
1417 }
1418
1419 static int xfrm_bundle_flo_check(struct flow_cache_object *flo)
1420 {
1421         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1422         struct dst_entry *dst = &xdst->u.dst;
1423
1424         if (!xdst->route)
1425                 return 0;
1426         if (stale_bundle(dst))
1427                 return 0;
1428
1429         return 1;
1430 }
1431
1432 static void xfrm_bundle_flo_delete(struct flow_cache_object *flo)
1433 {
1434         struct xfrm_dst *xdst = container_of(flo, struct xfrm_dst, flo);
1435         struct dst_entry *dst = &xdst->u.dst;
1436
1437         dst_free(dst);
1438 }
1439
1440 static const struct flow_cache_ops xfrm_bundle_fc_ops = {
1441         .get = xfrm_bundle_flo_get,
1442         .check = xfrm_bundle_flo_check,
1443         .delete = xfrm_bundle_flo_delete,
1444 };
1445
1446 static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family)
1447 {
1448         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
1449         struct dst_ops *dst_ops;
1450         struct xfrm_dst *xdst;
1451
1452         if (!afinfo)
1453                 return ERR_PTR(-EINVAL);
1454
1455         switch (family) {
1456         case AF_INET:
1457                 dst_ops = &net->xfrm.xfrm4_dst_ops;
1458                 break;
1459 #if IS_ENABLED(CONFIG_IPV6)
1460         case AF_INET6:
1461                 dst_ops = &net->xfrm.xfrm6_dst_ops;
1462                 break;
1463 #endif
1464         default:
1465                 BUG();
1466         }
1467         xdst = dst_alloc(dst_ops, NULL, 0, DST_OBSOLETE_NONE, 0);
1468
1469         if (likely(xdst)) {
1470                 struct dst_entry *dst = &xdst->u.dst;
1471
1472                 memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst));
1473                 xdst->flo.ops = &xfrm_bundle_fc_ops;
1474                 if (afinfo->init_dst)
1475                         afinfo->init_dst(net, xdst);
1476         } else
1477                 xdst = ERR_PTR(-ENOBUFS);
1478
1479         xfrm_policy_put_afinfo(afinfo);
1480
1481         return xdst;
1482 }
1483
1484 static inline int xfrm_init_path(struct xfrm_dst *path, struct dst_entry *dst,
1485                                  int nfheader_len)
1486 {
1487         struct xfrm_policy_afinfo *afinfo =
1488                 xfrm_policy_get_afinfo(dst->ops->family);
1489         int err;
1490
1491         if (!afinfo)
1492                 return -EINVAL;
1493
1494         err = afinfo->init_path(path, dst, nfheader_len);
1495
1496         xfrm_policy_put_afinfo(afinfo);
1497
1498         return err;
1499 }
1500
1501 static inline int xfrm_fill_dst(struct xfrm_dst *xdst, struct net_device *dev,
1502                                 const struct flowi *fl)
1503 {
1504         struct xfrm_policy_afinfo *afinfo =
1505                 xfrm_policy_get_afinfo(xdst->u.dst.ops->family);
1506         int err;
1507
1508         if (!afinfo)
1509                 return -EINVAL;
1510
1511         err = afinfo->fill_dst(xdst, dev, fl);
1512
1513         xfrm_policy_put_afinfo(afinfo);
1514
1515         return err;
1516 }
1517
1518
1519 /* Allocate chain of dst_entry's, attach known xfrm's, calculate
1520  * all the metrics... Shortly, bundle a bundle.
1521  */
1522
1523 static struct dst_entry *xfrm_bundle_create(struct xfrm_policy *policy,
1524                                             struct xfrm_state **xfrm, int nx,
1525                                             const struct flowi *fl,
1526                                             struct dst_entry *dst)
1527 {
1528         struct net *net = xp_net(policy);
1529         unsigned long now = jiffies;
1530         struct net_device *dev;
1531         struct xfrm_mode *inner_mode;
1532         struct dst_entry *dst_prev = NULL;
1533         struct dst_entry *dst0 = NULL;
1534         int i = 0;
1535         int err;
1536         int header_len = 0;
1537         int nfheader_len = 0;
1538         int trailer_len = 0;
1539         int tos;
1540         int family = policy->selector.family;
1541         xfrm_address_t saddr, daddr;
1542
1543         xfrm_flowi_addr_get(fl, &saddr, &daddr, family);
1544
1545         tos = xfrm_get_tos(fl, family);
1546         err = tos;
1547         if (tos < 0)
1548                 goto put_states;
1549
1550         dst_hold(dst);
1551
1552         for (; i < nx; i++) {
1553                 struct xfrm_dst *xdst = xfrm_alloc_dst(net, family);
1554                 struct dst_entry *dst1 = &xdst->u.dst;
1555
1556                 err = PTR_ERR(xdst);
1557                 if (IS_ERR(xdst)) {
1558                         dst_release(dst);
1559                         goto put_states;
1560                 }
1561
1562                 if (xfrm[i]->sel.family == AF_UNSPEC) {
1563                         inner_mode = xfrm_ip2inner_mode(xfrm[i],
1564                                                         xfrm_af2proto(family));
1565                         if (!inner_mode) {
1566                                 err = -EAFNOSUPPORT;
1567                                 dst_release(dst);
1568                                 goto put_states;
1569                         }
1570                 } else
1571                         inner_mode = xfrm[i]->inner_mode;
1572
1573                 if (!dst_prev)
1574                         dst0 = dst1;
1575                 else {
1576                         dst_prev->child = dst_clone(dst1);
1577                         dst1->flags |= DST_NOHASH;
1578                 }
1579
1580                 xdst->route = dst;
1581                 dst_copy_metrics(dst1, dst);
1582
1583                 if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) {
1584                         family = xfrm[i]->props.family;
1585                         dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr,
1586                                               family);
1587                         err = PTR_ERR(dst);
1588                         if (IS_ERR(dst))
1589                                 goto put_states;
1590                 } else
1591                         dst_hold(dst);
1592
1593                 dst1->xfrm = xfrm[i];
1594                 xdst->xfrm_genid = xfrm[i]->genid;
1595
1596                 dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1597                 dst1->flags |= DST_HOST;
1598                 dst1->lastuse = now;
1599
1600                 dst1->input = dst_discard;
1601                 dst1->output = inner_mode->afinfo->output;
1602
1603                 dst1->next = dst_prev;
1604                 dst_prev = dst1;
1605
1606                 header_len += xfrm[i]->props.header_len;
1607                 if (xfrm[i]->type->flags & XFRM_TYPE_NON_FRAGMENT)
1608                         nfheader_len += xfrm[i]->props.header_len;
1609                 trailer_len += xfrm[i]->props.trailer_len;
1610         }
1611
1612         dst_prev->child = dst;
1613         dst0->path = dst;
1614
1615         err = -ENODEV;
1616         dev = dst->dev;
1617         if (!dev)
1618                 goto free_dst;
1619
1620         xfrm_init_path((struct xfrm_dst *)dst0, dst, nfheader_len);
1621         xfrm_init_pmtu(dst_prev);
1622
1623         for (dst_prev = dst0; dst_prev != dst; dst_prev = dst_prev->child) {
1624                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst_prev;
1625
1626                 err = xfrm_fill_dst(xdst, dev, fl);
1627                 if (err)
1628                         goto free_dst;
1629
1630                 dst_prev->header_len = header_len;
1631                 dst_prev->trailer_len = trailer_len;
1632                 header_len -= xdst->u.dst.xfrm->props.header_len;
1633                 trailer_len -= xdst->u.dst.xfrm->props.trailer_len;
1634         }
1635
1636 out:
1637         return dst0;
1638
1639 put_states:
1640         for (; i < nx; i++)
1641                 xfrm_state_put(xfrm[i]);
1642 free_dst:
1643         if (dst0)
1644                 dst_free(dst0);
1645         dst0 = ERR_PTR(err);
1646         goto out;
1647 }
1648
1649 #ifdef CONFIG_XFRM_SUB_POLICY
1650 static int xfrm_dst_alloc_copy(void **target, const void *src, int size)
1651 {
1652         if (!*target) {
1653                 *target = kmalloc(size, GFP_ATOMIC);
1654                 if (!*target)
1655                         return -ENOMEM;
1656         }
1657
1658         memcpy(*target, src, size);
1659         return 0;
1660 }
1661 #endif
1662
1663 static int xfrm_dst_update_parent(struct dst_entry *dst,
1664                                   const struct xfrm_selector *sel)
1665 {
1666 #ifdef CONFIG_XFRM_SUB_POLICY
1667         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1668         return xfrm_dst_alloc_copy((void **)&(xdst->partner),
1669                                    sel, sizeof(*sel));
1670 #else
1671         return 0;
1672 #endif
1673 }
1674
1675 static int xfrm_dst_update_origin(struct dst_entry *dst,
1676                                   const struct flowi *fl)
1677 {
1678 #ifdef CONFIG_XFRM_SUB_POLICY
1679         struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
1680         return xfrm_dst_alloc_copy((void **)&(xdst->origin), fl, sizeof(*fl));
1681 #else
1682         return 0;
1683 #endif
1684 }
1685
1686 static int xfrm_expand_policies(const struct flowi *fl, u16 family,
1687                                 struct xfrm_policy **pols,
1688                                 int *num_pols, int *num_xfrms)
1689 {
1690         int i;
1691
1692         if (*num_pols == 0 || !pols[0]) {
1693                 *num_pols = 0;
1694                 *num_xfrms = 0;
1695                 return 0;
1696         }
1697         if (IS_ERR(pols[0]))
1698                 return PTR_ERR(pols[0]);
1699
1700         *num_xfrms = pols[0]->xfrm_nr;
1701
1702 #ifdef CONFIG_XFRM_SUB_POLICY
1703         if (pols[0] && pols[0]->action == XFRM_POLICY_ALLOW &&
1704             pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
1705                 pols[1] = xfrm_policy_lookup_bytype(xp_net(pols[0]),
1706                                                     XFRM_POLICY_TYPE_MAIN,
1707                                                     fl, family,
1708                                                     XFRM_POLICY_OUT);
1709                 if (pols[1]) {
1710                         if (IS_ERR(pols[1])) {
1711                                 xfrm_pols_put(pols, *num_pols);
1712                                 return PTR_ERR(pols[1]);
1713                         }
1714                         (*num_pols)++;
1715                         (*num_xfrms) += pols[1]->xfrm_nr;
1716                 }
1717         }
1718 #endif
1719         for (i = 0; i < *num_pols; i++) {
1720                 if (pols[i]->action != XFRM_POLICY_ALLOW) {
1721                         *num_xfrms = -1;
1722                         break;
1723                 }
1724         }
1725
1726         return 0;
1727
1728 }
1729
1730 static struct xfrm_dst *
1731 xfrm_resolve_and_create_bundle(struct xfrm_policy **pols, int num_pols,
1732                                const struct flowi *fl, u16 family,
1733                                struct dst_entry *dst_orig)
1734 {
1735         struct net *net = xp_net(pols[0]);
1736         struct xfrm_state *xfrm[XFRM_MAX_DEPTH];
1737         struct dst_entry *dst;
1738         struct xfrm_dst *xdst;
1739         int err;
1740
1741         /* Try to instantiate a bundle */
1742         err = xfrm_tmpl_resolve(pols, num_pols, fl, xfrm, family);
1743         if (err <= 0) {
1744                 if (err != 0 && err != -EAGAIN)
1745                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
1746                 return ERR_PTR(err);
1747         }
1748
1749         dst = xfrm_bundle_create(pols[0], xfrm, err, fl, dst_orig);
1750         if (IS_ERR(dst)) {
1751                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLEGENERROR);
1752                 return ERR_CAST(dst);
1753         }
1754
1755         xdst = (struct xfrm_dst *)dst;
1756         xdst->num_xfrms = err;
1757         if (num_pols > 1)
1758                 err = xfrm_dst_update_parent(dst, &pols[1]->selector);
1759         else
1760                 err = xfrm_dst_update_origin(dst, fl);
1761         if (unlikely(err)) {
1762                 dst_free(dst);
1763                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTBUNDLECHECKERROR);
1764                 return ERR_PTR(err);
1765         }
1766
1767         xdst->num_pols = num_pols;
1768         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
1769         xdst->policy_genid = atomic_read(&pols[0]->genid);
1770
1771         return xdst;
1772 }
1773
1774 static void xfrm_policy_queue_process(unsigned long arg)
1775 {
1776         int err = 0;
1777         struct sk_buff *skb;
1778         struct sock *sk;
1779         struct dst_entry *dst;
1780         struct xfrm_policy *pol = (struct xfrm_policy *)arg;
1781         struct xfrm_policy_queue *pq = &pol->polq;
1782         struct flowi fl;
1783         struct sk_buff_head list;
1784
1785         spin_lock(&pq->hold_queue.lock);
1786         skb = skb_peek(&pq->hold_queue);
1787         if (!skb) {
1788                 spin_unlock(&pq->hold_queue.lock);
1789                 goto out;
1790         }
1791         dst = skb_dst(skb);
1792         sk = skb->sk;
1793         xfrm_decode_session(skb, &fl, dst->ops->family);
1794         spin_unlock(&pq->hold_queue.lock);
1795
1796         dst_hold(dst->path);
1797         dst = xfrm_lookup(xp_net(pol), dst->path, &fl,
1798                           sk, 0);
1799         if (IS_ERR(dst))
1800                 goto purge_queue;
1801
1802         if (dst->flags & DST_XFRM_QUEUE) {
1803                 dst_release(dst);
1804
1805                 if (pq->timeout >= XFRM_QUEUE_TMO_MAX)
1806                         goto purge_queue;
1807
1808                 pq->timeout = pq->timeout << 1;
1809                 if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout))
1810                         xfrm_pol_hold(pol);
1811         goto out;
1812         }
1813
1814         dst_release(dst);
1815
1816         __skb_queue_head_init(&list);
1817
1818         spin_lock(&pq->hold_queue.lock);
1819         pq->timeout = 0;
1820         skb_queue_splice_init(&pq->hold_queue, &list);
1821         spin_unlock(&pq->hold_queue.lock);
1822
1823         while (!skb_queue_empty(&list)) {
1824                 skb = __skb_dequeue(&list);
1825
1826                 xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family);
1827                 dst_hold(skb_dst(skb)->path);
1828                 dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path,
1829                                   &fl, skb->sk, 0);
1830                 if (IS_ERR(dst)) {
1831                         kfree_skb(skb);
1832                         continue;
1833                 }
1834
1835                 nf_reset(skb);
1836                 skb_dst_drop(skb);
1837                 skb_dst_set(skb, dst);
1838
1839                 err = dst_output(skb);
1840         }
1841
1842 out:
1843         xfrm_pol_put(pol);
1844         return;
1845
1846 purge_queue:
1847         pq->timeout = 0;
1848         xfrm_queue_purge(&pq->hold_queue);
1849         xfrm_pol_put(pol);
1850 }
1851
1852 static int xdst_queue_output(struct sk_buff *skb)
1853 {
1854         unsigned long sched_next;
1855         struct dst_entry *dst = skb_dst(skb);
1856         struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
1857         struct xfrm_policy *pol = xdst->pols[0];
1858         struct xfrm_policy_queue *pq = &pol->polq;
1859         const struct sk_buff *fclone = skb + 1;
1860
1861         if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
1862                      fclone->fclone == SKB_FCLONE_CLONE)) {
1863                 kfree_skb(skb);
1864                 return 0;
1865         }
1866
1867         if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) {
1868                 kfree_skb(skb);
1869                 return -EAGAIN;
1870         }
1871
1872         skb_dst_force(skb);
1873
1874         spin_lock_bh(&pq->hold_queue.lock);
1875
1876         if (!pq->timeout)
1877                 pq->timeout = XFRM_QUEUE_TMO_MIN;
1878
1879         sched_next = jiffies + pq->timeout;
1880
1881         if (del_timer(&pq->hold_timer)) {
1882                 if (time_before(pq->hold_timer.expires, sched_next))
1883                         sched_next = pq->hold_timer.expires;
1884                 xfrm_pol_put(pol);
1885         }
1886
1887         __skb_queue_tail(&pq->hold_queue, skb);
1888         if (!mod_timer(&pq->hold_timer, sched_next))
1889                 xfrm_pol_hold(pol);
1890
1891         spin_unlock_bh(&pq->hold_queue.lock);
1892
1893         return 0;
1894 }
1895
1896 static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net,
1897                                                  struct xfrm_flo *xflo,
1898                                                  const struct flowi *fl,
1899                                                  int num_xfrms,
1900                                                  u16 family)
1901 {
1902         int err;
1903         struct net_device *dev;
1904         struct dst_entry *dst;
1905         struct dst_entry *dst1;
1906         struct xfrm_dst *xdst;
1907
1908         xdst = xfrm_alloc_dst(net, family);
1909         if (IS_ERR(xdst))
1910                 return xdst;
1911
1912         if (!(xflo->flags & XFRM_LOOKUP_QUEUE) ||
1913             net->xfrm.sysctl_larval_drop ||
1914             num_xfrms <= 0)
1915                 return xdst;
1916
1917         dst = xflo->dst_orig;
1918         dst1 = &xdst->u.dst;
1919         dst_hold(dst);
1920         xdst->route = dst;
1921
1922         dst_copy_metrics(dst1, dst);
1923
1924         dst1->obsolete = DST_OBSOLETE_FORCE_CHK;
1925         dst1->flags |= DST_HOST | DST_XFRM_QUEUE;
1926         dst1->lastuse = jiffies;
1927
1928         dst1->input = dst_discard;
1929         dst1->output = xdst_queue_output;
1930
1931         dst_hold(dst);
1932         dst1->child = dst;
1933         dst1->path = dst;
1934
1935         xfrm_init_path((struct xfrm_dst *)dst1, dst, 0);
1936
1937         err = -ENODEV;
1938         dev = dst->dev;
1939         if (!dev)
1940                 goto free_dst;
1941
1942         err = xfrm_fill_dst(xdst, dev, fl);
1943         if (err)
1944                 goto free_dst;
1945
1946 out:
1947         return xdst;
1948
1949 free_dst:
1950         dst_release(dst1);
1951         xdst = ERR_PTR(err);
1952         goto out;
1953 }
1954
1955 static struct flow_cache_object *
1956 xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir,
1957                    struct flow_cache_object *oldflo, void *ctx)
1958 {
1959         struct xfrm_flo *xflo = (struct xfrm_flo *)ctx;
1960         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
1961         struct xfrm_dst *xdst, *new_xdst;
1962         int num_pols = 0, num_xfrms = 0, i, err, pol_dead;
1963
1964         /* Check if the policies from old bundle are usable */
1965         xdst = NULL;
1966         if (oldflo) {
1967                 xdst = container_of(oldflo, struct xfrm_dst, flo);
1968                 num_pols = xdst->num_pols;
1969                 num_xfrms = xdst->num_xfrms;
1970                 pol_dead = 0;
1971                 for (i = 0; i < num_pols; i++) {
1972                         pols[i] = xdst->pols[i];
1973                         pol_dead |= pols[i]->walk.dead;
1974                 }
1975                 if (pol_dead) {
1976                         dst_free(&xdst->u.dst);
1977                         xdst = NULL;
1978                         num_pols = 0;
1979                         num_xfrms = 0;
1980                         oldflo = NULL;
1981                 }
1982         }
1983
1984         /* Resolve policies to use if we couldn't get them from
1985          * previous cache entry */
1986         if (xdst == NULL) {
1987                 num_pols = 1;
1988                 pols[0] = __xfrm_policy_lookup(net, fl, family,
1989                                                flow_to_policy_dir(dir));
1990                 err = xfrm_expand_policies(fl, family, pols,
1991                                            &num_pols, &num_xfrms);
1992                 if (err < 0)
1993                         goto inc_error;
1994                 if (num_pols == 0)
1995                         return NULL;
1996                 if (num_xfrms <= 0)
1997                         goto make_dummy_bundle;
1998         }
1999
2000         new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family,
2001                                                   xflo->dst_orig);
2002         if (IS_ERR(new_xdst)) {
2003                 err = PTR_ERR(new_xdst);
2004                 if (err != -EAGAIN)
2005                         goto error;
2006                 if (oldflo == NULL)
2007                         goto make_dummy_bundle;
2008                 dst_hold(&xdst->u.dst);
2009                 return oldflo;
2010         } else if (new_xdst == NULL) {
2011                 num_xfrms = 0;
2012                 if (oldflo == NULL)
2013                         goto make_dummy_bundle;
2014                 xdst->num_xfrms = 0;
2015                 dst_hold(&xdst->u.dst);
2016                 return oldflo;
2017         }
2018
2019         /* Kill the previous bundle */
2020         if (xdst) {
2021                 /* The policies were stolen for newly generated bundle */
2022                 xdst->num_pols = 0;
2023                 dst_free(&xdst->u.dst);
2024         }
2025
2026         /* Flow cache does not have reference, it dst_free()'s,
2027          * but we do need to return one reference for original caller */
2028         dst_hold(&new_xdst->u.dst);
2029         return &new_xdst->flo;
2030
2031 make_dummy_bundle:
2032         /* We found policies, but there's no bundles to instantiate:
2033          * either because the policy blocks, has no transformations or
2034          * we could not build template (no xfrm_states).*/
2035         xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family);
2036         if (IS_ERR(xdst)) {
2037                 xfrm_pols_put(pols, num_pols);
2038                 return ERR_CAST(xdst);
2039         }
2040         xdst->num_pols = num_pols;
2041         xdst->num_xfrms = num_xfrms;
2042         memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols);
2043
2044         dst_hold(&xdst->u.dst);
2045         return &xdst->flo;
2046
2047 inc_error:
2048         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLERROR);
2049 error:
2050         if (xdst != NULL)
2051                 dst_free(&xdst->u.dst);
2052         else
2053                 xfrm_pols_put(pols, num_pols);
2054         return ERR_PTR(err);
2055 }
2056
2057 static struct dst_entry *make_blackhole(struct net *net, u16 family,
2058                                         struct dst_entry *dst_orig)
2059 {
2060         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2061         struct dst_entry *ret;
2062
2063         if (!afinfo) {
2064                 dst_release(dst_orig);
2065                 return ERR_PTR(-EINVAL);
2066         } else {
2067                 ret = afinfo->blackhole_route(net, dst_orig);
2068         }
2069         xfrm_policy_put_afinfo(afinfo);
2070
2071         return ret;
2072 }
2073
2074 /* Main function: finds/creates a bundle for given flow.
2075  *
2076  * At the moment we eat a raw IP route. Mostly to speed up lookups
2077  * on interfaces with disabled IPsec.
2078  */
2079 struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig,
2080                               const struct flowi *fl,
2081                               struct sock *sk, int flags)
2082 {
2083         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2084         struct flow_cache_object *flo;
2085         struct xfrm_dst *xdst;
2086         struct dst_entry *dst, *route;
2087         u16 family = dst_orig->ops->family;
2088         u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
2089         int i, err, num_pols, num_xfrms = 0, drop_pols = 0;
2090
2091         dst = NULL;
2092         xdst = NULL;
2093         route = NULL;
2094
2095         if (sk && sk->sk_policy[XFRM_POLICY_OUT]) {
2096                 num_pols = 1;
2097                 pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl);
2098                 err = xfrm_expand_policies(fl, family, pols,
2099                                            &num_pols, &num_xfrms);
2100                 if (err < 0)
2101                         goto dropdst;
2102
2103                 if (num_pols) {
2104                         if (num_xfrms <= 0) {
2105                                 drop_pols = num_pols;
2106                                 goto no_transform;
2107                         }
2108
2109                         xdst = xfrm_resolve_and_create_bundle(
2110                                         pols, num_pols, fl,
2111                                         family, dst_orig);
2112                         if (IS_ERR(xdst)) {
2113                                 xfrm_pols_put(pols, num_pols);
2114                                 err = PTR_ERR(xdst);
2115                                 goto dropdst;
2116                         } else if (xdst == NULL) {
2117                                 num_xfrms = 0;
2118                                 drop_pols = num_pols;
2119                                 goto no_transform;
2120                         }
2121
2122                         dst_hold(&xdst->u.dst);
2123
2124                         spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2125                         xdst->u.dst.next = xfrm_policy_sk_bundles;
2126                         xfrm_policy_sk_bundles = &xdst->u.dst;
2127                         spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2128
2129                         route = xdst->route;
2130                 }
2131         }
2132
2133         if (xdst == NULL) {
2134                 struct xfrm_flo xflo;
2135
2136                 xflo.dst_orig = dst_orig;
2137                 xflo.flags = flags;
2138
2139                 /* To accelerate a bit...  */
2140                 if ((dst_orig->flags & DST_NOXFRM) ||
2141                     !net->xfrm.policy_count[XFRM_POLICY_OUT])
2142                         goto nopol;
2143
2144                 flo = flow_cache_lookup(net, fl, family, dir,
2145                                         xfrm_bundle_lookup, &xflo);
2146                 if (flo == NULL)
2147                         goto nopol;
2148                 if (IS_ERR(flo)) {
2149                         err = PTR_ERR(flo);
2150                         goto dropdst;
2151                 }
2152                 xdst = container_of(flo, struct xfrm_dst, flo);
2153
2154                 num_pols = xdst->num_pols;
2155                 num_xfrms = xdst->num_xfrms;
2156                 memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols);
2157                 route = xdst->route;
2158         }
2159
2160         dst = &xdst->u.dst;
2161         if (route == NULL && num_xfrms > 0) {
2162                 /* The only case when xfrm_bundle_lookup() returns a
2163                  * bundle with null route, is when the template could
2164                  * not be resolved. It means policies are there, but
2165                  * bundle could not be created, since we don't yet
2166                  * have the xfrm_state's. We need to wait for KM to
2167                  * negotiate new SA's or bail out with error.*/
2168                 if (net->xfrm.sysctl_larval_drop) {
2169                         dst_release(dst);
2170                         xfrm_pols_put(pols, drop_pols);
2171                         XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2172
2173                         return ERR_PTR(-EREMOTE);
2174                 }
2175
2176                 err = -EAGAIN;
2177
2178                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES);
2179                 goto error;
2180         }
2181
2182 no_transform:
2183         if (num_pols == 0)
2184                 goto nopol;
2185
2186         if ((flags & XFRM_LOOKUP_ICMP) &&
2187             !(pols[0]->flags & XFRM_POLICY_ICMP)) {
2188                 err = -ENOENT;
2189                 goto error;
2190         }
2191
2192         for (i = 0; i < num_pols; i++)
2193                 pols[i]->curlft.use_time = get_seconds();
2194
2195         if (num_xfrms < 0) {
2196                 /* Prohibit the flow */
2197                 XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTPOLBLOCK);
2198                 err = -EPERM;
2199                 goto error;
2200         } else if (num_xfrms > 0) {
2201                 /* Flow transformed */
2202                 dst_release(dst_orig);
2203         } else {
2204                 /* Flow passes untransformed */
2205                 dst_release(dst);
2206                 dst = dst_orig;
2207         }
2208 ok:
2209         xfrm_pols_put(pols, drop_pols);
2210         if (dst && dst->xfrm &&
2211             dst->xfrm->props.mode == XFRM_MODE_TUNNEL)
2212                 dst->flags |= DST_XFRM_TUNNEL;
2213         return dst;
2214
2215 nopol:
2216         if (!(flags & XFRM_LOOKUP_ICMP)) {
2217                 dst = dst_orig;
2218                 goto ok;
2219         }
2220         err = -ENOENT;
2221 error:
2222         dst_release(dst);
2223 dropdst:
2224         dst_release(dst_orig);
2225         xfrm_pols_put(pols, drop_pols);
2226         return ERR_PTR(err);
2227 }
2228 EXPORT_SYMBOL(xfrm_lookup);
2229
2230 /* Callers of xfrm_lookup_route() must ensure a call to dst_output().
2231  * Otherwise we may send out blackholed packets.
2232  */
2233 struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig,
2234                                     const struct flowi *fl,
2235                                     struct sock *sk, int flags)
2236 {
2237         struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk,
2238                                             flags | XFRM_LOOKUP_QUEUE);
2239
2240         if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE)
2241                 return make_blackhole(net, dst_orig->ops->family, dst_orig);
2242
2243         return dst;
2244 }
2245 EXPORT_SYMBOL(xfrm_lookup_route);
2246
2247 static inline int
2248 xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl)
2249 {
2250         struct xfrm_state *x;
2251
2252         if (!skb->sp || idx < 0 || idx >= skb->sp->len)
2253                 return 0;
2254         x = skb->sp->xvec[idx];
2255         if (!x->type->reject)
2256                 return 0;
2257         return x->type->reject(x, skb, fl);
2258 }
2259
2260 /* When skb is transformed back to its "native" form, we have to
2261  * check policy restrictions. At the moment we make this in maximally
2262  * stupid way. Shame on me. :-) Of course, connected sockets must
2263  * have policy cached at them.
2264  */
2265
2266 static inline int
2267 xfrm_state_ok(const struct xfrm_tmpl *tmpl, const struct xfrm_state *x,
2268               unsigned short family)
2269 {
2270         if (xfrm_state_kern(x))
2271                 return tmpl->optional && !xfrm_state_addr_cmp(tmpl, x, tmpl->encap_family);
2272         return  x->id.proto == tmpl->id.proto &&
2273                 (x->id.spi == tmpl->id.spi || !tmpl->id.spi) &&
2274                 (x->props.reqid == tmpl->reqid || !tmpl->reqid) &&
2275                 x->props.mode == tmpl->mode &&
2276                 (tmpl->allalgs || (tmpl->aalgos & (1<<x->props.aalgo)) ||
2277                  !(xfrm_id_proto_match(tmpl->id.proto, IPSEC_PROTO_ANY))) &&
2278                 !(x->props.mode != XFRM_MODE_TRANSPORT &&
2279                   xfrm_state_addr_cmp(tmpl, x, family));
2280 }
2281
2282 /*
2283  * 0 or more than 0 is returned when validation is succeeded (either bypass
2284  * because of optional transport mode, or next index of the mathced secpath
2285  * state with the template.
2286  * -1 is returned when no matching template is found.
2287  * Otherwise "-2 - errored_index" is returned.
2288  */
2289 static inline int
2290 xfrm_policy_ok(const struct xfrm_tmpl *tmpl, const struct sec_path *sp, int start,
2291                unsigned short family)
2292 {
2293         int idx = start;
2294
2295         if (tmpl->optional) {
2296                 if (tmpl->mode == XFRM_MODE_TRANSPORT)
2297                         return start;
2298         } else
2299                 start = -1;
2300         for (; idx < sp->len; idx++) {
2301                 if (xfrm_state_ok(tmpl, sp->xvec[idx], family))
2302                         return ++idx;
2303                 if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) {
2304                         if (start == -1)
2305                                 start = -2-idx;
2306                         break;
2307                 }
2308         }
2309         return start;
2310 }
2311
2312 int __xfrm_decode_session(struct sk_buff *skb, struct flowi *fl,
2313                           unsigned int family, int reverse)
2314 {
2315         struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family);
2316         int err;
2317
2318         if (unlikely(afinfo == NULL))
2319                 return -EAFNOSUPPORT;
2320
2321         afinfo->decode_session(skb, fl, reverse);
2322         err = security_xfrm_decode_session(skb, &fl->flowi_secid);
2323         xfrm_policy_put_afinfo(afinfo);
2324         return err;
2325 }
2326 EXPORT_SYMBOL(__xfrm_decode_session);
2327
2328 static inline int secpath_has_nontransport(const struct sec_path *sp, int k, int *idxp)
2329 {
2330         for (; k < sp->len; k++) {
2331                 if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) {
2332                         *idxp = k;
2333                         return 1;
2334                 }
2335         }
2336
2337         return 0;
2338 }
2339
2340 int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb,
2341                         unsigned short family)
2342 {
2343         struct net *net = dev_net(skb->dev);
2344         struct xfrm_policy *pol;
2345         struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX];
2346         int npols = 0;
2347         int xfrm_nr;
2348         int pi;
2349         int reverse;
2350         struct flowi fl;
2351         u8 fl_dir;
2352         int xerr_idx = -1;
2353
2354         reverse = dir & ~XFRM_POLICY_MASK;
2355         dir &= XFRM_POLICY_MASK;
2356         fl_dir = policy_to_flow_dir(dir);
2357
2358         if (__xfrm_decode_session(skb, &fl, family, reverse) < 0) {
2359                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINHDRERROR);
2360                 return 0;
2361         }
2362
2363         nf_nat_decode_session(skb, &fl, family);
2364
2365         /* First, check used SA against their selectors. */
2366         if (skb->sp) {
2367                 int i;
2368
2369                 for (i = skb->sp->len-1; i >= 0; i--) {
2370                         struct xfrm_state *x = skb->sp->xvec[i];
2371                         if (!xfrm_selector_match(&x->sel, &fl, family)) {
2372                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH);
2373                                 return 0;
2374                         }
2375                 }
2376         }
2377
2378         pol = NULL;
2379         if (sk && sk->sk_policy[dir]) {
2380                 pol = xfrm_sk_policy_lookup(sk, dir, &fl);
2381                 if (IS_ERR(pol)) {
2382                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2383                         return 0;
2384                 }
2385         }
2386
2387         if (!pol) {
2388                 struct flow_cache_object *flo;
2389
2390                 flo = flow_cache_lookup(net, &fl, family, fl_dir,
2391                                         xfrm_policy_lookup, NULL);
2392                 if (IS_ERR_OR_NULL(flo))
2393                         pol = ERR_CAST(flo);
2394                 else
2395                         pol = container_of(flo, struct xfrm_policy, flo);
2396         }
2397
2398         if (IS_ERR(pol)) {
2399                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2400                 return 0;
2401         }
2402
2403         if (!pol) {
2404                 if (skb->sp && secpath_has_nontransport(skb->sp, 0, &xerr_idx)) {
2405                         xfrm_secpath_reject(xerr_idx, skb, &fl);
2406                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINNOPOLS);
2407                         return 0;
2408                 }
2409                 return 1;
2410         }
2411
2412         pol->curlft.use_time = get_seconds();
2413
2414         pols[0] = pol;
2415         npols++;
2416 #ifdef CONFIG_XFRM_SUB_POLICY
2417         if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) {
2418                 pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN,
2419                                                     &fl, family,
2420                                                     XFRM_POLICY_IN);
2421                 if (pols[1]) {
2422                         if (IS_ERR(pols[1])) {
2423                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR);
2424                                 return 0;
2425                         }
2426                         pols[1]->curlft.use_time = get_seconds();
2427                         npols++;
2428                 }
2429         }
2430 #endif
2431
2432         if (pol->action == XFRM_POLICY_ALLOW) {
2433                 struct sec_path *sp;
2434                 static struct sec_path dummy;
2435                 struct xfrm_tmpl *tp[XFRM_MAX_DEPTH];
2436                 struct xfrm_tmpl *stp[XFRM_MAX_DEPTH];
2437                 struct xfrm_tmpl **tpp = tp;
2438                 int ti = 0;
2439                 int i, k;
2440
2441                 if ((sp = skb->sp) == NULL)
2442                         sp = &dummy;
2443
2444                 for (pi = 0; pi < npols; pi++) {
2445                         if (pols[pi] != pol &&
2446                             pols[pi]->action != XFRM_POLICY_ALLOW) {
2447                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2448                                 goto reject;
2449                         }
2450                         if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) {
2451                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINBUFFERERROR);
2452                                 goto reject_error;
2453                         }
2454                         for (i = 0; i < pols[pi]->xfrm_nr; i++)
2455                                 tpp[ti++] = &pols[pi]->xfrm_vec[i];
2456                 }
2457                 xfrm_nr = ti;
2458                 if (npols > 1) {
2459                         xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net);
2460                         tpp = stp;
2461                 }
2462
2463                 /* For each tunnel xfrm, find the first matching tmpl.
2464                  * For each tmpl before that, find corresponding xfrm.
2465                  * Order is _important_. Later we will implement
2466                  * some barriers, but at the moment barriers
2467                  * are implied between each two transformations.
2468                  */
2469                 for (i = xfrm_nr-1, k = 0; i >= 0; i--) {
2470                         k = xfrm_policy_ok(tpp[i], sp, k, family);
2471                         if (k < 0) {
2472                                 if (k < -1)
2473                                         /* "-2 - errored_index" returned */
2474                                         xerr_idx = -(2+k);
2475                                 XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2476                                 goto reject;
2477                         }
2478                 }
2479
2480                 if (secpath_has_nontransport(sp, k, &xerr_idx)) {
2481                         XFRM_INC_STATS(net, LINUX_MIB_XFRMINTMPLMISMATCH);
2482                         goto reject;
2483                 }
2484
2485                 xfrm_pols_put(pols, npols);
2486                 return 1;
2487         }
2488         XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLBLOCK);
2489
2490 reject:
2491         xfrm_secpath_reject(xerr_idx, skb, &fl);
2492 reject_error:
2493         xfrm_pols_put(pols, npols);
2494         return 0;
2495 }
2496 EXPORT_SYMBOL(__xfrm_policy_check);
2497
2498 int __xfrm_route_forward(struct sk_buff *skb, unsigned short family)
2499 {
2500         struct net *net = dev_net(skb->dev);
2501         struct flowi fl;
2502         struct dst_entry *dst;
2503         int res = 1;
2504
2505         if (xfrm_decode_session(skb, &fl, family) < 0) {
2506                 XFRM_INC_STATS(net, LINUX_MIB_XFRMFWDHDRERROR);
2507                 return 0;
2508         }
2509
2510         skb_dst_force(skb);
2511
2512         dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE);
2513         if (IS_ERR(dst)) {
2514                 res = 0;
2515                 dst = NULL;
2516         }
2517         skb_dst_set(skb, dst);
2518         return res;
2519 }
2520 EXPORT_SYMBOL(__xfrm_route_forward);
2521
2522 /* Optimize later using cookies and generation ids. */
2523
2524 static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie)
2525 {
2526         /* Code (such as __xfrm4_bundle_create()) sets dst->obsolete
2527          * to DST_OBSOLETE_FORCE_CHK to force all XFRM destinations to
2528          * get validated by dst_ops->check on every use.  We do this
2529          * because when a normal route referenced by an XFRM dst is
2530          * obsoleted we do not go looking around for all parent
2531          * referencing XFRM dsts so that we can invalidate them.  It
2532          * is just too much work.  Instead we make the checks here on
2533          * every use.  For example:
2534          *
2535          *      XFRM dst A --> IPv4 dst X
2536          *
2537          * X is the "xdst->route" of A (X is also the "dst->path" of A
2538          * in this example).  If X is marked obsolete, "A" will not
2539          * notice.  That's what we are validating here via the
2540          * stale_bundle() check.
2541          *
2542          * When a policy's bundle is pruned, we dst_free() the XFRM
2543          * dst which causes it's ->obsolete field to be set to
2544          * DST_OBSOLETE_DEAD.  If an XFRM dst has been pruned like
2545          * this, we want to force a new route lookup.
2546          */
2547         if (dst->obsolete < 0 && !stale_bundle(dst))
2548                 return dst;
2549
2550         return NULL;
2551 }
2552
2553 static int stale_bundle(struct dst_entry *dst)
2554 {
2555         return !xfrm_bundle_ok((struct xfrm_dst *)dst);
2556 }
2557
2558 void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev)
2559 {
2560         while ((dst = dst->child) && dst->xfrm && dst->dev == dev) {
2561                 dst->dev = dev_net(dev)->loopback_dev;
2562                 dev_hold(dst->dev);
2563                 dev_put(dev);
2564         }
2565 }
2566 EXPORT_SYMBOL(xfrm_dst_ifdown);
2567
2568 static void xfrm_link_failure(struct sk_buff *skb)
2569 {
2570         /* Impossible. Such dst must be popped before reaches point of failure. */
2571 }
2572
2573 static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst)
2574 {
2575         if (dst) {
2576                 if (dst->obsolete) {
2577                         dst_release(dst);
2578                         dst = NULL;
2579                 }
2580         }
2581         return dst;
2582 }
2583
2584 static void __xfrm_garbage_collect(struct net *net)
2585 {
2586         struct dst_entry *head, *next;
2587
2588         spin_lock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2589         head = xfrm_policy_sk_bundles;
2590         xfrm_policy_sk_bundles = NULL;
2591         spin_unlock_bh(&net->xfrm.xfrm_policy_sk_bundle_lock);
2592
2593         while (head) {
2594                 next = head->next;
2595                 dst_free(head);
2596                 head = next;
2597         }
2598 }
2599
2600 void xfrm_garbage_collect(struct net *net)
2601 {
2602         flow_cache_flush();
2603         __xfrm_garbage_collect(net);
2604 }
2605 EXPORT_SYMBOL(xfrm_garbage_collect);
2606
2607 static void xfrm_garbage_collect_deferred(struct net *net)
2608 {
2609         flow_cache_flush_deferred();
2610         __xfrm_garbage_collect(net);
2611 }
2612
2613 static void xfrm_init_pmtu(struct dst_entry *dst)
2614 {
2615         do {
2616                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2617                 u32 pmtu, route_mtu_cached;
2618
2619                 pmtu = dst_mtu(dst->child);
2620                 xdst->child_mtu_cached = pmtu;
2621
2622                 pmtu = xfrm_state_mtu(dst->xfrm, pmtu);
2623
2624                 route_mtu_cached = dst_mtu(xdst->route);
2625                 xdst->route_mtu_cached = route_mtu_cached;
2626
2627                 if (pmtu > route_mtu_cached)
2628                         pmtu = route_mtu_cached;
2629
2630                 dst_metric_set(dst, RTAX_MTU, pmtu);
2631         } while ((dst = dst->next));
2632 }
2633
2634 /* Check that the bundle accepts the flow and its components are
2635  * still valid.
2636  */
2637
2638 static int xfrm_bundle_ok(struct xfrm_dst *first)
2639 {
2640         struct dst_entry *dst = &first->u.dst;
2641         struct xfrm_dst *last;
2642         u32 mtu;
2643
2644         if (!dst_check(dst->path, ((struct xfrm_dst *)dst)->path_cookie) ||
2645             (dst->dev && !netif_running(dst->dev)))
2646                 return 0;
2647
2648         if (dst->flags & DST_XFRM_QUEUE)
2649                 return 1;
2650
2651         last = NULL;
2652
2653         do {
2654                 struct xfrm_dst *xdst = (struct xfrm_dst *)dst;
2655
2656                 if (dst->xfrm->km.state != XFRM_STATE_VALID)
2657                         return 0;
2658                 if (xdst->xfrm_genid != dst->xfrm->genid)
2659                         return 0;
2660                 if (xdst->num_pols > 0 &&
2661                     xdst->policy_genid != atomic_read(&xdst->pols[0]->genid))
2662                         return 0;
2663
2664                 mtu = dst_mtu(dst->child);
2665                 if (xdst->child_mtu_cached != mtu) {
2666                         last = xdst;
2667                         xdst->child_mtu_cached = mtu;
2668                 }
2669
2670                 if (!dst_check(xdst->route, xdst->route_cookie))
2671                         return 0;
2672                 mtu = dst_mtu(xdst->route);
2673                 if (xdst->route_mtu_cached != mtu) {
2674                         last = xdst;
2675                         xdst->route_mtu_cached = mtu;
2676                 }
2677
2678                 dst = dst->child;
2679         } while (dst->xfrm);
2680
2681         if (likely(!last))
2682                 return 1;
2683
2684         mtu = last->child_mtu_cached;
2685         for (;;) {
2686                 dst = &last->u.dst;
2687
2688                 mtu = xfrm_state_mtu(dst->xfrm, mtu);
2689                 if (mtu > last->route_mtu_cached)
2690                         mtu = last->route_mtu_cached;
2691                 dst_metric_set(dst, RTAX_MTU, mtu);
2692
2693                 if (last == first)
2694                         break;
2695
2696                 last = (struct xfrm_dst *)last->u.dst.next;
2697                 last->child_mtu_cached = mtu;
2698         }
2699
2700         return 1;
2701 }
2702
2703 static unsigned int xfrm_default_advmss(const struct dst_entry *dst)
2704 {
2705         return dst_metric_advmss(dst->path);
2706 }
2707
2708 static unsigned int xfrm_mtu(const struct dst_entry *dst)
2709 {
2710         unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
2711
2712         return mtu ? : dst_mtu(dst->path);
2713 }
2714
2715 static struct neighbour *xfrm_neigh_lookup(const struct dst_entry *dst,
2716                                            struct sk_buff *skb,
2717                                            const void *daddr)
2718 {
2719         return dst->path->ops->neigh_lookup(dst, skb, daddr);
2720 }
2721
2722 int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo)
2723 {
2724         struct net *net;
2725         int err = 0;
2726         if (unlikely(afinfo == NULL))
2727                 return -EINVAL;
2728         if (unlikely(afinfo->family >= NPROTO))
2729                 return -EAFNOSUPPORT;
2730         spin_lock(&xfrm_policy_afinfo_lock);
2731         if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL))
2732                 err = -ENOBUFS;
2733         else {
2734                 struct dst_ops *dst_ops = afinfo->dst_ops;
2735                 if (likely(dst_ops->kmem_cachep == NULL))
2736                         dst_ops->kmem_cachep = xfrm_dst_cache;
2737                 if (likely(dst_ops->check == NULL))
2738                         dst_ops->check = xfrm_dst_check;
2739                 if (likely(dst_ops->default_advmss == NULL))
2740                         dst_ops->default_advmss = xfrm_default_advmss;
2741                 if (likely(dst_ops->mtu == NULL))
2742                         dst_ops->mtu = xfrm_mtu;
2743                 if (likely(dst_ops->negative_advice == NULL))
2744                         dst_ops->negative_advice = xfrm_negative_advice;
2745                 if (likely(dst_ops->link_failure == NULL))
2746                         dst_ops->link_failure = xfrm_link_failure;
2747                 if (likely(dst_ops->neigh_lookup == NULL))
2748                         dst_ops->neigh_lookup = xfrm_neigh_lookup;
2749                 if (likely(afinfo->garbage_collect == NULL))
2750                         afinfo->garbage_collect = xfrm_garbage_collect_deferred;
2751                 rcu_assign_pointer(xfrm_policy_afinfo[afinfo->family], afinfo);
2752         }
2753         spin_unlock(&xfrm_policy_afinfo_lock);
2754
2755         rtnl_lock();
2756         for_each_net(net) {
2757                 struct dst_ops *xfrm_dst_ops;
2758
2759                 switch (afinfo->family) {
2760                 case AF_INET:
2761                         xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops;
2762                         break;
2763 #if IS_ENABLED(CONFIG_IPV6)
2764                 case AF_INET6:
2765                         xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops;
2766                         break;
2767 #endif
2768                 default:
2769                         BUG();
2770                 }
2771                 *xfrm_dst_ops = *afinfo->dst_ops;
2772         }
2773         rtnl_unlock();
2774
2775         return err;
2776 }
2777 EXPORT_SYMBOL(xfrm_policy_register_afinfo);
2778
2779 int xfrm_policy_unregister_afinfo(struct xfrm_policy_afinfo *afinfo)
2780 {
2781         int err = 0;
2782         if (unlikely(afinfo == NULL))
2783                 return -EINVAL;
2784         if (unlikely(afinfo->family >= NPROTO))
2785                 return -EAFNOSUPPORT;
2786         spin_lock(&xfrm_policy_afinfo_lock);
2787         if (likely(xfrm_policy_afinfo[afinfo->family] != NULL)) {
2788                 if (unlikely(xfrm_policy_afinfo[afinfo->family] != afinfo))
2789                         err = -EINVAL;
2790                 else
2791                         RCU_INIT_POINTER(xfrm_policy_afinfo[afinfo->family],
2792                                          NULL);
2793         }
2794         spin_unlock(&xfrm_policy_afinfo_lock);
2795         if (!err) {
2796                 struct dst_ops *dst_ops = afinfo->dst_ops;
2797
2798                 synchronize_rcu();
2799
2800                 dst_ops->kmem_cachep = NULL;
2801                 dst_ops->check = NULL;
2802                 dst_ops->negative_advice = NULL;
2803                 dst_ops->link_failure = NULL;
2804                 afinfo->garbage_collect = NULL;
2805         }
2806         return err;
2807 }
2808 EXPORT_SYMBOL(xfrm_policy_unregister_afinfo);
2809
2810 static void __net_init xfrm_dst_ops_init(struct net *net)
2811 {
2812         struct xfrm_policy_afinfo *afinfo;
2813
2814         rcu_read_lock();
2815         afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]);
2816         if (afinfo)
2817                 net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops;
2818 #if IS_ENABLED(CONFIG_IPV6)
2819         afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]);
2820         if (afinfo)
2821                 net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops;
2822 #endif
2823         rcu_read_unlock();
2824 }
2825
2826 static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr)
2827 {
2828         struct net_device *dev = netdev_notifier_info_to_dev(ptr);
2829
2830         switch (event) {
2831         case NETDEV_DOWN:
2832                 xfrm_garbage_collect(dev_net(dev));
2833         }
2834         return NOTIFY_DONE;
2835 }
2836
2837 static struct notifier_block xfrm_dev_notifier = {
2838         .notifier_call  = xfrm_dev_event,
2839 };
2840
2841 #ifdef CONFIG_XFRM_STATISTICS
2842 static int __net_init xfrm_statistics_init(struct net *net)
2843 {
2844         int rv;
2845
2846         if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics,
2847                           sizeof(struct linux_xfrm_mib),
2848                           __alignof__(struct linux_xfrm_mib)) < 0)
2849                 return -ENOMEM;
2850         rv = xfrm_proc_init(net);
2851         if (rv < 0)
2852                 snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2853         return rv;
2854 }
2855
2856 static void xfrm_statistics_fini(struct net *net)
2857 {
2858         xfrm_proc_fini(net);
2859         snmp_mib_free((void __percpu **)net->mib.xfrm_statistics);
2860 }
2861 #else
2862 static int __net_init xfrm_statistics_init(struct net *net)
2863 {
2864         return 0;
2865 }
2866
2867 static void xfrm_statistics_fini(struct net *net)
2868 {
2869 }
2870 #endif
2871
2872 static int __net_init xfrm_policy_init(struct net *net)
2873 {
2874         unsigned int hmask, sz;
2875         int dir;
2876
2877         if (net_eq(net, &init_net))
2878                 xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache",
2879                                            sizeof(struct xfrm_dst),
2880                                            0, SLAB_HWCACHE_ALIGN|SLAB_PANIC,
2881                                            NULL);
2882
2883         hmask = 8 - 1;
2884         sz = (hmask+1) * sizeof(struct hlist_head);
2885
2886         net->xfrm.policy_byidx = xfrm_hash_alloc(sz);
2887         if (!net->xfrm.policy_byidx)
2888                 goto out_byidx;
2889         net->xfrm.policy_idx_hmask = hmask;
2890
2891         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2892                 struct xfrm_policy_hash *htab;
2893
2894                 net->xfrm.policy_count[dir] = 0;
2895                 INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]);
2896
2897                 htab = &net->xfrm.policy_bydst[dir];
2898                 htab->table = xfrm_hash_alloc(sz);
2899                 if (!htab->table)
2900                         goto out_bydst;
2901                 htab->hmask = hmask;
2902         }
2903
2904         INIT_LIST_HEAD(&net->xfrm.policy_all);
2905         INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize);
2906         if (net_eq(net, &init_net))
2907                 register_netdevice_notifier(&xfrm_dev_notifier);
2908         return 0;
2909
2910 out_bydst:
2911         for (dir--; dir >= 0; dir--) {
2912                 struct xfrm_policy_hash *htab;
2913
2914                 htab = &net->xfrm.policy_bydst[dir];
2915                 xfrm_hash_free(htab->table, sz);
2916         }
2917         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2918 out_byidx:
2919         return -ENOMEM;
2920 }
2921
2922 static void xfrm_policy_fini(struct net *net)
2923 {
2924         struct xfrm_audit audit_info;
2925         unsigned int sz;
2926         int dir;
2927
2928         flush_work(&net->xfrm.policy_hash_work);
2929 #ifdef CONFIG_XFRM_SUB_POLICY
2930         audit_info.loginuid = INVALID_UID;
2931         audit_info.sessionid = (unsigned int)-1;
2932         audit_info.secid = 0;
2933         xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info);
2934 #endif
2935         audit_info.loginuid = INVALID_UID;
2936         audit_info.sessionid = (unsigned int)-1;
2937         audit_info.secid = 0;
2938         xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info);
2939
2940         WARN_ON(!list_empty(&net->xfrm.policy_all));
2941
2942         for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) {
2943                 struct xfrm_policy_hash *htab;
2944
2945                 WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir]));
2946
2947                 htab = &net->xfrm.policy_bydst[dir];
2948                 sz = (htab->hmask + 1) * sizeof(struct hlist_head);
2949                 WARN_ON(!hlist_empty(htab->table));
2950                 xfrm_hash_free(htab->table, sz);
2951         }
2952
2953         sz = (net->xfrm.policy_idx_hmask + 1) * sizeof(struct hlist_head);
2954         WARN_ON(!hlist_empty(net->xfrm.policy_byidx));
2955         xfrm_hash_free(net->xfrm.policy_byidx, sz);
2956 }
2957
2958 static int __net_init xfrm_net_init(struct net *net)
2959 {
2960         int rv;
2961
2962         rv = xfrm_statistics_init(net);
2963         if (rv < 0)
2964                 goto out_statistics;
2965         rv = xfrm_state_init(net);
2966         if (rv < 0)
2967                 goto out_state;
2968         rv = xfrm_policy_init(net);
2969         if (rv < 0)
2970                 goto out_policy;
2971         xfrm_dst_ops_init(net);
2972         rv = xfrm_sysctl_init(net);
2973         if (rv < 0)
2974                 goto out_sysctl;
2975
2976         /* Initialize the per-net locks here */
2977         spin_lock_init(&net->xfrm.xfrm_state_lock);
2978         rwlock_init(&net->xfrm.xfrm_policy_lock);
2979         spin_lock_init(&net->xfrm.xfrm_policy_sk_bundle_lock);
2980         mutex_init(&net->xfrm.xfrm_cfg_mutex);
2981
2982         return 0;
2983
2984 out_sysctl:
2985         xfrm_policy_fini(net);
2986 out_policy:
2987         xfrm_state_fini(net);
2988 out_state:
2989         xfrm_statistics_fini(net);
2990 out_statistics:
2991         return rv;
2992 }
2993
2994 static void __net_exit xfrm_net_exit(struct net *net)
2995 {
2996         xfrm_sysctl_fini(net);
2997         xfrm_policy_fini(net);
2998         xfrm_state_fini(net);
2999         xfrm_statistics_fini(net);
3000 }
3001
3002 static struct pernet_operations __net_initdata xfrm_net_ops = {
3003         .init = xfrm_net_init,
3004         .exit = xfrm_net_exit,
3005 };
3006
3007 void __init xfrm_init(void)
3008 {
3009         register_pernet_subsys(&xfrm_net_ops);
3010         xfrm_input_init();
3011 }
3012
3013 #ifdef CONFIG_AUDITSYSCALL
3014 static void xfrm_audit_common_policyinfo(struct xfrm_policy *xp,
3015                                          struct audit_buffer *audit_buf)
3016 {
3017         struct xfrm_sec_ctx *ctx = xp->security;
3018         struct xfrm_selector *sel = &xp->selector;
3019
3020         if (ctx)
3021                 audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s",
3022                                  ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str);
3023
3024         switch (sel->family) {
3025         case AF_INET:
3026                 audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4);
3027                 if (sel->prefixlen_s != 32)
3028                         audit_log_format(audit_buf, " src_prefixlen=%d",
3029                                          sel->prefixlen_s);
3030                 audit_log_format(audit_buf, " dst=%pI4", &sel->daddr.a4);
3031                 if (sel->prefixlen_d != 32)
3032                         audit_log_format(audit_buf, " dst_prefixlen=%d",
3033                                          sel->prefixlen_d);
3034                 break;
3035         case AF_INET6:
3036                 audit_log_format(audit_buf, " src=%pI6", sel->saddr.a6);
3037                 if (sel->prefixlen_s != 128)
3038                         audit_log_format(audit_buf, " src_prefixlen=%d",
3039                                          sel->prefixlen_s);
3040                 audit_log_format(audit_buf, " dst=%pI6", sel->daddr.a6);
3041                 if (sel->prefixlen_d != 128)
3042                         audit_log_format(audit_buf, " dst_prefixlen=%d",
3043                                          sel->prefixlen_d);
3044                 break;
3045         }
3046 }
3047
3048 void xfrm_audit_policy_add(struct xfrm_policy *xp, int result,
3049                            kuid_t auid, unsigned int sessionid, u32 secid)
3050 {
3051         struct audit_buffer *audit_buf;
3052
3053         audit_buf = xfrm_audit_start("SPD-add");
3054         if (audit_buf == NULL)
3055                 return;
3056         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
3057         audit_log_format(audit_buf, " res=%u", result);
3058         xfrm_audit_common_policyinfo(xp, audit_buf);
3059         audit_log_end(audit_buf);
3060 }
3061 EXPORT_SYMBOL_GPL(xfrm_audit_policy_add);
3062
3063 void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result,
3064                               kuid_t auid, unsigned int sessionid, u32 secid)
3065 {
3066         struct audit_buffer *audit_buf;
3067
3068         audit_buf = xfrm_audit_start("SPD-delete");
3069         if (audit_buf == NULL)
3070                 return;
3071         xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf);
3072         audit_log_format(audit_buf, " res=%u", result);
3073         xfrm_audit_common_policyinfo(xp, audit_buf);
3074         audit_log_end(audit_buf);
3075 }
3076 EXPORT_SYMBOL_GPL(xfrm_audit_policy_delete);
3077 #endif
3078
3079 #ifdef CONFIG_XFRM_MIGRATE
3080 static bool xfrm_migrate_selector_match(const struct xfrm_selector *sel_cmp,
3081                                         const struct xfrm_selector *sel_tgt)
3082 {
3083         if (sel_cmp->proto == IPSEC_ULPROTO_ANY) {
3084                 if (sel_tgt->family == sel_cmp->family &&
3085                     xfrm_addr_equal(&sel_tgt->daddr, &sel_cmp->daddr,
3086                                     sel_cmp->family) &&
3087                     xfrm_addr_equal(&sel_tgt->saddr, &sel_cmp->saddr,
3088                                     sel_cmp->family) &&
3089                     sel_tgt->prefixlen_d == sel_cmp->prefixlen_d &&
3090                     sel_tgt->prefixlen_s == sel_cmp->prefixlen_s) {
3091                         return true;
3092                 }
3093         } else {
3094                 if (memcmp(sel_tgt, sel_cmp, sizeof(*sel_tgt)) == 0) {
3095                         return true;
3096                 }
3097         }
3098         return false;
3099 }
3100
3101 static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel,
3102                                                     u8 dir, u8 type, struct net *net)
3103 {
3104         struct xfrm_policy *pol, *ret = NULL;
3105         struct hlist_head *chain;
3106         u32 priority = ~0U;
3107
3108         read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/
3109         chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir);
3110         hlist_for_each_entry(pol, chain, bydst) {
3111                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3112                     pol->type == type) {
3113                         ret = pol;
3114                         priority = ret->priority;
3115                         break;
3116                 }
3117         }
3118         chain = &net->xfrm.policy_inexact[dir];
3119         hlist_for_each_entry(pol, chain, bydst) {
3120                 if (xfrm_migrate_selector_match(sel, &pol->selector) &&
3121                     pol->type == type &&
3122                     pol->priority < priority) {
3123                         ret = pol;
3124                         break;
3125                 }
3126         }
3127
3128         if (ret)
3129                 xfrm_pol_hold(ret);
3130
3131         read_unlock_bh(&net->xfrm.xfrm_policy_lock);
3132
3133         return ret;
3134 }
3135
3136 static int migrate_tmpl_match(const struct xfrm_migrate *m, const struct xfrm_tmpl *t)
3137 {
3138         int match = 0;
3139
3140         if (t->mode == m->mode && t->id.proto == m->proto &&
3141             (m->reqid == 0 || t->reqid == m->reqid)) {
3142                 switch (t->mode) {
3143                 case XFRM_MODE_TUNNEL:
3144                 case XFRM_MODE_BEET:
3145                         if (xfrm_addr_equal(&t->id.daddr, &m->old_daddr,
3146                                             m->old_family) &&
3147                             xfrm_addr_equal(&t->saddr, &m->old_saddr,
3148                                             m->old_family)) {
3149                                 match = 1;
3150                         }
3151                         break;
3152                 case XFRM_MODE_TRANSPORT:
3153                         /* in case of transport mode, template does not store
3154                            any IP addresses, hence we just compare mode and
3155                            protocol */
3156                         match = 1;
3157                         break;
3158                 default:
3159                         break;
3160                 }
3161         }
3162         return match;
3163 }
3164
3165 /* update endpoint address(es) of template(s) */
3166 static int xfrm_policy_migrate(struct xfrm_policy *pol,
3167                                struct xfrm_migrate *m, int num_migrate)
3168 {
3169         struct xfrm_migrate *mp;
3170         int i, j, n = 0;
3171
3172         write_lock_bh(&pol->lock);
3173         if (unlikely(pol->walk.dead)) {
3174                 /* target policy has been deleted */
3175                 write_unlock_bh(&pol->lock);
3176                 return -ENOENT;
3177         }
3178
3179         for (i = 0; i < pol->xfrm_nr; i++) {
3180                 for (j = 0, mp = m; j < num_migrate; j++, mp++) {
3181                         if (!migrate_tmpl_match(mp, &pol->xfrm_vec[i]))
3182                                 continue;
3183                         n++;
3184                         if (pol->xfrm_vec[i].mode != XFRM_MODE_TUNNEL &&
3185                             pol->xfrm_vec[i].mode != XFRM_MODE_BEET)
3186                                 continue;
3187                         /* update endpoints */
3188                         memcpy(&pol->xfrm_vec[i].id.daddr, &mp->new_daddr,
3189                                sizeof(pol->xfrm_vec[i].id.daddr));
3190                         memcpy(&pol->xfrm_vec[i].saddr, &mp->new_saddr,
3191                                sizeof(pol->xfrm_vec[i].saddr));
3192                         pol->xfrm_vec[i].encap_family = mp->new_family;
3193                         /* flush bundles */
3194                         atomic_inc(&pol->genid);
3195                 }
3196         }
3197
3198         write_unlock_bh(&pol->lock);
3199
3200         if (!n)
3201                 return -ENODATA;
3202
3203         return 0;
3204 }
3205
3206 static int xfrm_migrate_check(const struct xfrm_migrate *m, int num_migrate)
3207 {
3208         int i, j;
3209
3210         if (num_migrate < 1 || num_migrate > XFRM_MAX_DEPTH)
3211                 return -EINVAL;
3212
3213         for (i = 0; i < num_migrate; i++) {
3214                 if (xfrm_addr_equal(&m[i].old_daddr, &m[i].new_daddr,
3215                                     m[i].old_family) &&
3216                     xfrm_addr_equal(&m[i].old_saddr, &m[i].new_saddr,
3217                                     m[i].old_family))
3218                         return -EINVAL;
3219                 if (xfrm_addr_any(&m[i].new_daddr, m[i].new_family) ||
3220                     xfrm_addr_any(&m[i].new_saddr, m[i].new_family))
3221                         return -EINVAL;
3222
3223                 /* check if there is any duplicated entry */
3224                 for (j = i + 1; j < num_migrate; j++) {
3225                         if (!memcmp(&m[i].old_daddr, &m[j].old_daddr,
3226                                     sizeof(m[i].old_daddr)) &&
3227                             !memcmp(&m[i].old_saddr, &m[j].old_saddr,
3228                                     sizeof(m[i].old_saddr)) &&
3229                             m[i].proto == m[j].proto &&
3230                             m[i].mode == m[j].mode &&
3231                             m[i].reqid == m[j].reqid &&
3232                             m[i].old_family == m[j].old_family)
3233                                 return -EINVAL;
3234                 }
3235         }
3236
3237         return 0;
3238 }
3239
3240 int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type,
3241                  struct xfrm_migrate *m, int num_migrate,
3242                  struct xfrm_kmaddress *k, struct net *net)
3243 {
3244         int i, err, nx_cur = 0, nx_new = 0;
3245         struct xfrm_policy *pol = NULL;
3246         struct xfrm_state *x, *xc;
3247         struct xfrm_state *x_cur[XFRM_MAX_DEPTH];
3248         struct xfrm_state *x_new[XFRM_MAX_DEPTH];
3249         struct xfrm_migrate *mp;
3250
3251         if ((err = xfrm_migrate_check(m, num_migrate)) < 0)
3252                 goto out;
3253
3254         /* Stage 1 - find policy */
3255         if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) {
3256                 err = -ENOENT;
3257                 goto out;
3258         }
3259
3260         /* Stage 2 - find and update state(s) */
3261         for (i = 0, mp = m; i < num_migrate; i++, mp++) {
3262                 if ((x = xfrm_migrate_state_find(mp, net))) {
3263                         x_cur[nx_cur] = x;
3264                         nx_cur++;
3265                         if ((xc = xfrm_state_migrate(x, mp))) {
3266                                 x_new[nx_new] = xc;
3267                                 nx_new++;
3268                         } else {
3269                                 err = -ENODATA;
3270                                 goto restore_state;
3271                         }
3272                 }
3273         }
3274
3275         /* Stage 3 - update policy */
3276         if ((err = xfrm_policy_migrate(pol, m, num_migrate)) < 0)
3277                 goto restore_state;
3278
3279         /* Stage 4 - delete old state(s) */
3280         if (nx_cur) {
3281                 xfrm_states_put(x_cur, nx_cur);
3282                 xfrm_states_delete(x_cur, nx_cur);
3283         }
3284
3285         /* Stage 5 - announce */
3286         km_migrate(sel, dir, type, m, num_migrate, k);
3287
3288         xfrm_pol_put(pol);
3289
3290         return 0;
3291 out:
3292         return err;
3293
3294 restore_state:
3295         if (pol)
3296                 xfrm_pol_put(pol);
3297         if (nx_cur)
3298                 xfrm_states_put(x_cur, nx_cur);
3299         if (nx_new)
3300                 xfrm_states_delete(x_new, nx_new);
3301
3302         return err;
3303 }
3304 EXPORT_SYMBOL(xfrm_migrate);
3305 #endif