upload tizen1.0 source
[kernel/linux-2.6.36.git] / net / sched / cls_rsvp.h
1 /*
2  * net/sched/cls_rsvp.h Template file for RSVPv[46] classifiers.
3  *
4  *              This program is free software; you can redistribute it and/or
5  *              modify it under the terms of the GNU General Public License
6  *              as published by the Free Software Foundation; either version
7  *              2 of the License, or (at your option) any later version.
8  *
9  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
10  */
11
12 /*
13    Comparing to general packet classification problem,
14    RSVP needs only sevaral relatively simple rules:
15
16    * (dst, protocol) are always specified,
17      so that we are able to hash them.
18    * src may be exact, or may be wildcard, so that
19      we can keep a hash table plus one wildcard entry.
20    * source port (or flow label) is important only if src is given.
21
22    IMPLEMENTATION.
23
24    We use a two level hash table: The top level is keyed by
25    destination address and protocol ID, every bucket contains a list
26    of "rsvp sessions", identified by destination address, protocol and
27    DPI(="Destination Port ID"): triple (key, mask, offset).
28
29    Every bucket has a smaller hash table keyed by source address
30    (cf. RSVP flowspec) and one wildcard entry for wildcard reservations.
31    Every bucket is again a list of "RSVP flows", selected by
32    source address and SPI(="Source Port ID" here rather than
33    "security parameter index"): triple (key, mask, offset).
34
35
36    NOTE 1. All the packets with IPv6 extension headers (but AH and ESP)
37    and all fragmented packets go to the best-effort traffic class.
38
39
40    NOTE 2. Two "port id"'s seems to be redundant, rfc2207 requires
41    only one "Generalized Port Identifier". So that for classic
42    ah, esp (and udp,tcp) both *pi should coincide or one of them
43    should be wildcard.
44
45    At first sight, this redundancy is just a waste of CPU
46    resources. But DPI and SPI add the possibility to assign different
47    priorities to GPIs. Look also at note 4 about tunnels below.
48
49
50    NOTE 3. One complication is the case of tunneled packets.
51    We implement it as following: if the first lookup
52    matches a special session with "tunnelhdr" value not zero,
53    flowid doesn't contain the true flow ID, but the tunnel ID (1...255).
54    In this case, we pull tunnelhdr bytes and restart lookup
55    with tunnel ID added to the list of keys. Simple and stupid 8)8)
56    It's enough for PIMREG and IPIP.
57
58
59    NOTE 4. Two GPIs make it possible to parse even GRE packets.
60    F.e. DPI can select ETH_P_IP (and necessary flags to make
61    tunnelhdr correct) in GRE protocol field and SPI matches
62    GRE key. Is it not nice? 8)8)
63
64
65    Well, as result, despite its simplicity, we get a pretty
66    powerful classification engine.  */
67
68
69 struct rsvp_head
70 {
71         u32                     tmap[256/32];
72         u32                     hgenerator;
73         u8                      tgenerator;
74         struct rsvp_session     *ht[256];
75 };
76
77 struct rsvp_session
78 {
79         struct rsvp_session     *next;
80         __be32                  dst[RSVP_DST_LEN];
81         struct tc_rsvp_gpi      dpi;
82         u8                      protocol;
83         u8                      tunnelid;
84         /* 16 (src,sport) hash slots, and one wildcard source slot */
85         struct rsvp_filter      *ht[16+1];
86 };
87
88
89 struct rsvp_filter
90 {
91         struct rsvp_filter      *next;
92         __be32                  src[RSVP_DST_LEN];
93         struct tc_rsvp_gpi      spi;
94         u8                      tunnelhdr;
95
96         struct tcf_result       res;
97         struct tcf_exts         exts;
98
99         u32                     handle;
100         struct rsvp_session     *sess;
101 };
102
103 static __inline__ unsigned hash_dst(__be32 *dst, u8 protocol, u8 tunnelid)
104 {
105         unsigned h = (__force __u32)dst[RSVP_DST_LEN-1];
106         h ^= h>>16;
107         h ^= h>>8;
108         return (h ^ protocol ^ tunnelid) & 0xFF;
109 }
110
111 static __inline__ unsigned hash_src(__be32 *src)
112 {
113         unsigned h = (__force __u32)src[RSVP_DST_LEN-1];
114         h ^= h>>16;
115         h ^= h>>8;
116         h ^= h>>4;
117         return h & 0xF;
118 }
119
120 static struct tcf_ext_map rsvp_ext_map = {
121         .police = TCA_RSVP_POLICE,
122         .action = TCA_RSVP_ACT
123 };
124
125 #define RSVP_APPLY_RESULT()                             \
126 {                                                       \
127         int r = tcf_exts_exec(skb, &f->exts, res);      \
128         if (r < 0)                                      \
129                 continue;                               \
130         else if (r > 0)                                 \
131                 return r;                               \
132 }
133
134 static int rsvp_classify(struct sk_buff *skb, struct tcf_proto *tp,
135                          struct tcf_result *res)
136 {
137         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
138         struct rsvp_session *s;
139         struct rsvp_filter *f;
140         unsigned h1, h2;
141         __be32 *dst, *src;
142         u8 protocol;
143         u8 tunnelid = 0;
144         u8 *xprt;
145 #if RSVP_DST_LEN == 4
146         struct ipv6hdr *nhptr;
147
148         if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
149                 return -1;
150         nhptr = ipv6_hdr(skb);
151 #else
152         struct iphdr *nhptr;
153
154         if (!pskb_network_may_pull(skb, sizeof(*nhptr)))
155                 return -1;
156         nhptr = ip_hdr(skb);
157 #endif
158
159 restart:
160
161 #if RSVP_DST_LEN == 4
162         src = &nhptr->saddr.s6_addr32[0];
163         dst = &nhptr->daddr.s6_addr32[0];
164         protocol = nhptr->nexthdr;
165         xprt = ((u8*)nhptr) + sizeof(struct ipv6hdr);
166 #else
167         src = &nhptr->saddr;
168         dst = &nhptr->daddr;
169         protocol = nhptr->protocol;
170         xprt = ((u8*)nhptr) + (nhptr->ihl<<2);
171         if (nhptr->frag_off & htons(IP_MF|IP_OFFSET))
172                 return -1;
173 #endif
174
175         h1 = hash_dst(dst, protocol, tunnelid);
176         h2 = hash_src(src);
177
178         for (s = sht[h1]; s; s = s->next) {
179                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
180                     protocol == s->protocol &&
181                     !(s->dpi.mask &
182                       (*(u32*)(xprt+s->dpi.offset)^s->dpi.key)) &&
183 #if RSVP_DST_LEN == 4
184                     dst[0] == s->dst[0] &&
185                     dst[1] == s->dst[1] &&
186                     dst[2] == s->dst[2] &&
187 #endif
188                     tunnelid == s->tunnelid) {
189
190                         for (f = s->ht[h2]; f; f = f->next) {
191                                 if (src[RSVP_DST_LEN-1] == f->src[RSVP_DST_LEN-1] &&
192                                     !(f->spi.mask & (*(u32*)(xprt+f->spi.offset)^f->spi.key))
193 #if RSVP_DST_LEN == 4
194                                     &&
195                                     src[0] == f->src[0] &&
196                                     src[1] == f->src[1] &&
197                                     src[2] == f->src[2]
198 #endif
199                                     ) {
200                                         *res = f->res;
201                                         RSVP_APPLY_RESULT();
202
203 matched:
204                                         if (f->tunnelhdr == 0)
205                                                 return 0;
206
207                                         tunnelid = f->res.classid;
208                                         nhptr = (void*)(xprt + f->tunnelhdr - sizeof(*nhptr));
209                                         goto restart;
210                                 }
211                         }
212
213                         /* And wildcard bucket... */
214                         for (f = s->ht[16]; f; f = f->next) {
215                                 *res = f->res;
216                                 RSVP_APPLY_RESULT();
217                                 goto matched;
218                         }
219                         return -1;
220                 }
221         }
222         return -1;
223 }
224
225 static unsigned long rsvp_get(struct tcf_proto *tp, u32 handle)
226 {
227         struct rsvp_session **sht = ((struct rsvp_head*)tp->root)->ht;
228         struct rsvp_session *s;
229         struct rsvp_filter *f;
230         unsigned h1 = handle&0xFF;
231         unsigned h2 = (handle>>8)&0xFF;
232
233         if (h2 > 16)
234                 return 0;
235
236         for (s = sht[h1]; s; s = s->next) {
237                 for (f = s->ht[h2]; f; f = f->next) {
238                         if (f->handle == handle)
239                                 return (unsigned long)f;
240                 }
241         }
242         return 0;
243 }
244
245 static void rsvp_put(struct tcf_proto *tp, unsigned long f)
246 {
247 }
248
249 static int rsvp_init(struct tcf_proto *tp)
250 {
251         struct rsvp_head *data;
252
253         data = kzalloc(sizeof(struct rsvp_head), GFP_KERNEL);
254         if (data) {
255                 tp->root = data;
256                 return 0;
257         }
258         return -ENOBUFS;
259 }
260
261 static inline void
262 rsvp_delete_filter(struct tcf_proto *tp, struct rsvp_filter *f)
263 {
264         tcf_unbind_filter(tp, &f->res);
265         tcf_exts_destroy(tp, &f->exts);
266         kfree(f);
267 }
268
269 static void rsvp_destroy(struct tcf_proto *tp)
270 {
271         struct rsvp_head *data = xchg(&tp->root, NULL);
272         struct rsvp_session **sht;
273         int h1, h2;
274
275         if (data == NULL)
276                 return;
277
278         sht = data->ht;
279
280         for (h1=0; h1<256; h1++) {
281                 struct rsvp_session *s;
282
283                 while ((s = sht[h1]) != NULL) {
284                         sht[h1] = s->next;
285
286                         for (h2=0; h2<=16; h2++) {
287                                 struct rsvp_filter *f;
288
289                                 while ((f = s->ht[h2]) != NULL) {
290                                         s->ht[h2] = f->next;
291                                         rsvp_delete_filter(tp, f);
292                                 }
293                         }
294                         kfree(s);
295                 }
296         }
297         kfree(data);
298 }
299
300 static int rsvp_delete(struct tcf_proto *tp, unsigned long arg)
301 {
302         struct rsvp_filter **fp, *f = (struct rsvp_filter*)arg;
303         unsigned h = f->handle;
304         struct rsvp_session **sp;
305         struct rsvp_session *s = f->sess;
306         int i;
307
308         for (fp = &s->ht[(h>>8)&0xFF]; *fp; fp = &(*fp)->next) {
309                 if (*fp == f) {
310                         tcf_tree_lock(tp);
311                         *fp = f->next;
312                         tcf_tree_unlock(tp);
313                         rsvp_delete_filter(tp, f);
314
315                         /* Strip tree */
316
317                         for (i=0; i<=16; i++)
318                                 if (s->ht[i])
319                                         return 0;
320
321                         /* OK, session has no flows */
322                         for (sp = &((struct rsvp_head*)tp->root)->ht[h&0xFF];
323                              *sp; sp = &(*sp)->next) {
324                                 if (*sp == s) {
325                                         tcf_tree_lock(tp);
326                                         *sp = s->next;
327                                         tcf_tree_unlock(tp);
328
329                                         kfree(s);
330                                         return 0;
331                                 }
332                         }
333
334                         return 0;
335                 }
336         }
337         return 0;
338 }
339
340 static unsigned gen_handle(struct tcf_proto *tp, unsigned salt)
341 {
342         struct rsvp_head *data = tp->root;
343         int i = 0xFFFF;
344
345         while (i-- > 0) {
346                 u32 h;
347                 if ((data->hgenerator += 0x10000) == 0)
348                         data->hgenerator = 0x10000;
349                 h = data->hgenerator|salt;
350                 if (rsvp_get(tp, h) == 0)
351                         return h;
352         }
353         return 0;
354 }
355
356 static int tunnel_bts(struct rsvp_head *data)
357 {
358         int n = data->tgenerator>>5;
359         u32 b = 1<<(data->tgenerator&0x1F);
360
361         if (data->tmap[n]&b)
362                 return 0;
363         data->tmap[n] |= b;
364         return 1;
365 }
366
367 static void tunnel_recycle(struct rsvp_head *data)
368 {
369         struct rsvp_session **sht = data->ht;
370         u32 tmap[256/32];
371         int h1, h2;
372
373         memset(tmap, 0, sizeof(tmap));
374
375         for (h1=0; h1<256; h1++) {
376                 struct rsvp_session *s;
377                 for (s = sht[h1]; s; s = s->next) {
378                         for (h2=0; h2<=16; h2++) {
379                                 struct rsvp_filter *f;
380
381                                 for (f = s->ht[h2]; f; f = f->next) {
382                                         if (f->tunnelhdr == 0)
383                                                 continue;
384                                         data->tgenerator = f->res.classid;
385                                         tunnel_bts(data);
386                                 }
387                         }
388                 }
389         }
390
391         memcpy(data->tmap, tmap, sizeof(tmap));
392 }
393
394 static u32 gen_tunnel(struct rsvp_head *data)
395 {
396         int i, k;
397
398         for (k=0; k<2; k++) {
399                 for (i=255; i>0; i--) {
400                         if (++data->tgenerator == 0)
401                                 data->tgenerator = 1;
402                         if (tunnel_bts(data))
403                                 return data->tgenerator;
404                 }
405                 tunnel_recycle(data);
406         }
407         return 0;
408 }
409
410 static const struct nla_policy rsvp_policy[TCA_RSVP_MAX + 1] = {
411         [TCA_RSVP_CLASSID]      = { .type = NLA_U32 },
412         [TCA_RSVP_DST]          = { .type = NLA_BINARY,
413                                     .len = RSVP_DST_LEN * sizeof(u32) },
414         [TCA_RSVP_SRC]          = { .type = NLA_BINARY,
415                                     .len = RSVP_DST_LEN * sizeof(u32) },
416         [TCA_RSVP_PINFO]        = { .len = sizeof(struct tc_rsvp_pinfo) },
417 };
418
419 static int rsvp_change(struct tcf_proto *tp, unsigned long base,
420                        u32 handle,
421                        struct nlattr **tca,
422                        unsigned long *arg)
423 {
424         struct rsvp_head *data = tp->root;
425         struct rsvp_filter *f, **fp;
426         struct rsvp_session *s, **sp;
427         struct tc_rsvp_pinfo *pinfo = NULL;
428         struct nlattr *opt = tca[TCA_OPTIONS-1];
429         struct nlattr *tb[TCA_RSVP_MAX + 1];
430         struct tcf_exts e;
431         unsigned h1, h2;
432         __be32 *dst;
433         int err;
434
435         if (opt == NULL)
436                 return handle ? -EINVAL : 0;
437
438         err = nla_parse_nested(tb, TCA_RSVP_MAX, opt, rsvp_policy);
439         if (err < 0)
440                 return err;
441
442         err = tcf_exts_validate(tp, tb, tca[TCA_RATE-1], &e, &rsvp_ext_map);
443         if (err < 0)
444                 return err;
445
446         if ((f = (struct rsvp_filter*)*arg) != NULL) {
447                 /* Node exists: adjust only classid */
448
449                 if (f->handle != handle && handle)
450                         goto errout2;
451                 if (tb[TCA_RSVP_CLASSID-1]) {
452                         f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
453                         tcf_bind_filter(tp, &f->res, base);
454                 }
455
456                 tcf_exts_change(tp, &f->exts, &e);
457                 return 0;
458         }
459
460         /* Now more serious part... */
461         err = -EINVAL;
462         if (handle)
463                 goto errout2;
464         if (tb[TCA_RSVP_DST-1] == NULL)
465                 goto errout2;
466
467         err = -ENOBUFS;
468         f = kzalloc(sizeof(struct rsvp_filter), GFP_KERNEL);
469         if (f == NULL)
470                 goto errout2;
471
472         h2 = 16;
473         if (tb[TCA_RSVP_SRC-1]) {
474                 memcpy(f->src, nla_data(tb[TCA_RSVP_SRC-1]), sizeof(f->src));
475                 h2 = hash_src(f->src);
476         }
477         if (tb[TCA_RSVP_PINFO-1]) {
478                 pinfo = nla_data(tb[TCA_RSVP_PINFO-1]);
479                 f->spi = pinfo->spi;
480                 f->tunnelhdr = pinfo->tunnelhdr;
481         }
482         if (tb[TCA_RSVP_CLASSID-1])
483                 f->res.classid = nla_get_u32(tb[TCA_RSVP_CLASSID-1]);
484
485         dst = nla_data(tb[TCA_RSVP_DST-1]);
486         h1 = hash_dst(dst, pinfo ? pinfo->protocol : 0, pinfo ? pinfo->tunnelid : 0);
487
488         err = -ENOMEM;
489         if ((f->handle = gen_handle(tp, h1 | (h2<<8))) == 0)
490                 goto errout;
491
492         if (f->tunnelhdr) {
493                 err = -EINVAL;
494                 if (f->res.classid > 255)
495                         goto errout;
496
497                 err = -ENOMEM;
498                 if (f->res.classid == 0 &&
499                     (f->res.classid = gen_tunnel(data)) == 0)
500                         goto errout;
501         }
502
503         for (sp = &data->ht[h1]; (s=*sp) != NULL; sp = &s->next) {
504                 if (dst[RSVP_DST_LEN-1] == s->dst[RSVP_DST_LEN-1] &&
505                     pinfo && pinfo->protocol == s->protocol &&
506                     memcmp(&pinfo->dpi, &s->dpi, sizeof(s->dpi)) == 0 &&
507 #if RSVP_DST_LEN == 4
508                     dst[0] == s->dst[0] &&
509                     dst[1] == s->dst[1] &&
510                     dst[2] == s->dst[2] &&
511 #endif
512                     pinfo->tunnelid == s->tunnelid) {
513
514 insert:
515                         /* OK, we found appropriate session */
516
517                         fp = &s->ht[h2];
518
519                         f->sess = s;
520                         if (f->tunnelhdr == 0)
521                                 tcf_bind_filter(tp, &f->res, base);
522
523                         tcf_exts_change(tp, &f->exts, &e);
524
525                         for (fp = &s->ht[h2]; *fp; fp = &(*fp)->next)
526                                 if (((*fp)->spi.mask&f->spi.mask) != f->spi.mask)
527                                         break;
528                         f->next = *fp;
529                         wmb();
530                         *fp = f;
531
532                         *arg = (unsigned long)f;
533                         return 0;
534                 }
535         }
536
537         /* No session found. Create new one. */
538
539         err = -ENOBUFS;
540         s = kzalloc(sizeof(struct rsvp_session), GFP_KERNEL);
541         if (s == NULL)
542                 goto errout;
543         memcpy(s->dst, dst, sizeof(s->dst));
544
545         if (pinfo) {
546                 s->dpi = pinfo->dpi;
547                 s->protocol = pinfo->protocol;
548                 s->tunnelid = pinfo->tunnelid;
549         }
550         for (sp = &data->ht[h1]; *sp; sp = &(*sp)->next) {
551                 if (((*sp)->dpi.mask&s->dpi.mask) != s->dpi.mask)
552                         break;
553         }
554         s->next = *sp;
555         wmb();
556         *sp = s;
557
558         goto insert;
559
560 errout:
561         kfree(f);
562 errout2:
563         tcf_exts_destroy(tp, &e);
564         return err;
565 }
566
567 static void rsvp_walk(struct tcf_proto *tp, struct tcf_walker *arg)
568 {
569         struct rsvp_head *head = tp->root;
570         unsigned h, h1;
571
572         if (arg->stop)
573                 return;
574
575         for (h = 0; h < 256; h++) {
576                 struct rsvp_session *s;
577
578                 for (s = head->ht[h]; s; s = s->next) {
579                         for (h1 = 0; h1 <= 16; h1++) {
580                                 struct rsvp_filter *f;
581
582                                 for (f = s->ht[h1]; f; f = f->next) {
583                                         if (arg->count < arg->skip) {
584                                                 arg->count++;
585                                                 continue;
586                                         }
587                                         if (arg->fn(tp, (unsigned long)f, arg) < 0) {
588                                                 arg->stop = 1;
589                                                 return;
590                                         }
591                                         arg->count++;
592                                 }
593                         }
594                 }
595         }
596 }
597
598 static int rsvp_dump(struct tcf_proto *tp, unsigned long fh,
599                      struct sk_buff *skb, struct tcmsg *t)
600 {
601         struct rsvp_filter *f = (struct rsvp_filter*)fh;
602         struct rsvp_session *s;
603         unsigned char *b = skb_tail_pointer(skb);
604         struct nlattr *nest;
605         struct tc_rsvp_pinfo pinfo;
606
607         if (f == NULL)
608                 return skb->len;
609         s = f->sess;
610
611         t->tcm_handle = f->handle;
612
613         nest = nla_nest_start(skb, TCA_OPTIONS);
614         if (nest == NULL)
615                 goto nla_put_failure;
616
617         NLA_PUT(skb, TCA_RSVP_DST, sizeof(s->dst), &s->dst);
618         pinfo.dpi = s->dpi;
619         pinfo.spi = f->spi;
620         pinfo.protocol = s->protocol;
621         pinfo.tunnelid = s->tunnelid;
622         pinfo.tunnelhdr = f->tunnelhdr;
623         pinfo.pad = 0;
624         NLA_PUT(skb, TCA_RSVP_PINFO, sizeof(pinfo), &pinfo);
625         if (f->res.classid)
626                 NLA_PUT_U32(skb, TCA_RSVP_CLASSID, f->res.classid);
627         if (((f->handle>>8)&0xFF) != 16)
628                 NLA_PUT(skb, TCA_RSVP_SRC, sizeof(f->src), f->src);
629
630         if (tcf_exts_dump(skb, &f->exts, &rsvp_ext_map) < 0)
631                 goto nla_put_failure;
632
633         nla_nest_end(skb, nest);
634
635         if (tcf_exts_dump_stats(skb, &f->exts, &rsvp_ext_map) < 0)
636                 goto nla_put_failure;
637         return skb->len;
638
639 nla_put_failure:
640         nlmsg_trim(skb, b);
641         return -1;
642 }
643
644 static struct tcf_proto_ops RSVP_OPS = {
645         .next           =       NULL,
646         .kind           =       RSVP_ID,
647         .classify       =       rsvp_classify,
648         .init           =       rsvp_init,
649         .destroy        =       rsvp_destroy,
650         .get            =       rsvp_get,
651         .put            =       rsvp_put,
652         .change         =       rsvp_change,
653         .delete         =       rsvp_delete,
654         .walk           =       rsvp_walk,
655         .dump           =       rsvp_dump,
656         .owner          =       THIS_MODULE,
657 };
658
659 static int __init init_rsvp(void)
660 {
661         return register_tcf_proto_ops(&RSVP_OPS);
662 }
663
664 static void __exit exit_rsvp(void)
665 {
666         unregister_tcf_proto_ops(&RSVP_OPS);
667 }
668
669 module_init(init_rsvp)
670 module_exit(exit_rsvp)