Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/ebiederm...
[platform/adaptation/renesas_rcar/renesas_kernel.git] / net / sched / sch_teql.c
1 /* net/sched/sch_teql.c "True" (or "trivial") link equalizer.
2  *
3  *              This program is free software; you can redistribute it and/or
4  *              modify it under the terms of the GNU General Public License
5  *              as published by the Free Software Foundation; either version
6  *              2 of the License, or (at your option) any later version.
7  *
8  * Authors:     Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
9  */
10
11 #include <linux/module.h>
12 #include <linux/types.h>
13 #include <linux/kernel.h>
14 #include <linux/slab.h>
15 #include <linux/string.h>
16 #include <linux/errno.h>
17 #include <linux/if_arp.h>
18 #include <linux/netdevice.h>
19 #include <linux/init.h>
20 #include <linux/skbuff.h>
21 #include <linux/moduleparam.h>
22 #include <net/dst.h>
23 #include <net/neighbour.h>
24 #include <net/pkt_sched.h>
25
26 /*
27    How to setup it.
28    ----------------
29
30    After loading this module you will find a new device teqlN
31    and new qdisc with the same name. To join a slave to the equalizer
32    you should just set this qdisc on a device f.e.
33
34    # tc qdisc add dev eth0 root teql0
35    # tc qdisc add dev eth1 root teql0
36
37    That's all. Full PnP 8)
38
39    Applicability.
40    --------------
41
42    1. Slave devices MUST be active devices, i.e., they must raise the tbusy
43       signal and generate EOI events. If you want to equalize virtual devices
44       like tunnels, use a normal eql device.
45    2. This device puts no limitations on physical slave characteristics
46       f.e. it will equalize 9600baud line and 100Mb ethernet perfectly :-)
47       Certainly, large difference in link speeds will make the resulting
48       eqalized link unusable, because of huge packet reordering.
49       I estimate an upper useful difference as ~10 times.
50    3. If the slave requires address resolution, only protocols using
51       neighbour cache (IPv4/IPv6) will work over the equalized link.
52       Other protocols are still allowed to use the slave device directly,
53       which will not break load balancing, though native slave
54       traffic will have the highest priority.  */
55
56 struct teql_master {
57         struct Qdisc_ops qops;
58         struct net_device *dev;
59         struct Qdisc *slaves;
60         struct list_head master_list;
61         unsigned long   tx_bytes;
62         unsigned long   tx_packets;
63         unsigned long   tx_errors;
64         unsigned long   tx_dropped;
65 };
66
67 struct teql_sched_data {
68         struct Qdisc *next;
69         struct teql_master *m;
70         struct neighbour *ncache;
71         struct sk_buff_head q;
72 };
73
74 #define NEXT_SLAVE(q) (((struct teql_sched_data *)qdisc_priv(q))->next)
75
76 #define FMASK (IFF_BROADCAST | IFF_POINTOPOINT)
77
78 /* "teql*" qdisc routines */
79
80 static int
81 teql_enqueue(struct sk_buff *skb, struct Qdisc *sch)
82 {
83         struct net_device *dev = qdisc_dev(sch);
84         struct teql_sched_data *q = qdisc_priv(sch);
85
86         if (q->q.qlen < dev->tx_queue_len) {
87                 __skb_queue_tail(&q->q, skb);
88                 return NET_XMIT_SUCCESS;
89         }
90
91         return qdisc_drop(skb, sch);
92 }
93
94 static struct sk_buff *
95 teql_dequeue(struct Qdisc *sch)
96 {
97         struct teql_sched_data *dat = qdisc_priv(sch);
98         struct netdev_queue *dat_queue;
99         struct sk_buff *skb;
100
101         skb = __skb_dequeue(&dat->q);
102         dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
103         if (skb == NULL) {
104                 struct net_device *m = qdisc_dev(dat_queue->qdisc);
105                 if (m) {
106                         dat->m->slaves = sch;
107                         netif_wake_queue(m);
108                 }
109         } else {
110                 qdisc_bstats_update(sch, skb);
111         }
112         sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
113         return skb;
114 }
115
116 static struct sk_buff *
117 teql_peek(struct Qdisc *sch)
118 {
119         /* teql is meant to be used as root qdisc */
120         return NULL;
121 }
122
123 static inline void
124 teql_neigh_release(struct neighbour *n)
125 {
126         if (n)
127                 neigh_release(n);
128 }
129
130 static void
131 teql_reset(struct Qdisc *sch)
132 {
133         struct teql_sched_data *dat = qdisc_priv(sch);
134
135         skb_queue_purge(&dat->q);
136         sch->q.qlen = 0;
137         teql_neigh_release(xchg(&dat->ncache, NULL));
138 }
139
140 static void
141 teql_destroy(struct Qdisc *sch)
142 {
143         struct Qdisc *q, *prev;
144         struct teql_sched_data *dat = qdisc_priv(sch);
145         struct teql_master *master = dat->m;
146
147         prev = master->slaves;
148         if (prev) {
149                 do {
150                         q = NEXT_SLAVE(prev);
151                         if (q == sch) {
152                                 NEXT_SLAVE(prev) = NEXT_SLAVE(q);
153                                 if (q == master->slaves) {
154                                         master->slaves = NEXT_SLAVE(q);
155                                         if (q == master->slaves) {
156                                                 struct netdev_queue *txq;
157                                                 spinlock_t *root_lock;
158
159                                                 txq = netdev_get_tx_queue(master->dev, 0);
160                                                 master->slaves = NULL;
161
162                                                 root_lock = qdisc_root_sleeping_lock(txq->qdisc);
163                                                 spin_lock_bh(root_lock);
164                                                 qdisc_reset(txq->qdisc);
165                                                 spin_unlock_bh(root_lock);
166                                         }
167                                 }
168                                 skb_queue_purge(&dat->q);
169                                 teql_neigh_release(xchg(&dat->ncache, NULL));
170                                 break;
171                         }
172
173                 } while ((prev = q) != master->slaves);
174         }
175 }
176
177 static int teql_qdisc_init(struct Qdisc *sch, struct nlattr *opt)
178 {
179         struct net_device *dev = qdisc_dev(sch);
180         struct teql_master *m = (struct teql_master *)sch->ops;
181         struct teql_sched_data *q = qdisc_priv(sch);
182
183         if (dev->hard_header_len > m->dev->hard_header_len)
184                 return -EINVAL;
185
186         if (m->dev == dev)
187                 return -ELOOP;
188
189         q->m = m;
190
191         skb_queue_head_init(&q->q);
192
193         if (m->slaves) {
194                 if (m->dev->flags & IFF_UP) {
195                         if ((m->dev->flags & IFF_POINTOPOINT &&
196                              !(dev->flags & IFF_POINTOPOINT)) ||
197                             (m->dev->flags & IFF_BROADCAST &&
198                              !(dev->flags & IFF_BROADCAST)) ||
199                             (m->dev->flags & IFF_MULTICAST &&
200                              !(dev->flags & IFF_MULTICAST)) ||
201                             dev->mtu < m->dev->mtu)
202                                 return -EINVAL;
203                 } else {
204                         if (!(dev->flags&IFF_POINTOPOINT))
205                                 m->dev->flags &= ~IFF_POINTOPOINT;
206                         if (!(dev->flags&IFF_BROADCAST))
207                                 m->dev->flags &= ~IFF_BROADCAST;
208                         if (!(dev->flags&IFF_MULTICAST))
209                                 m->dev->flags &= ~IFF_MULTICAST;
210                         if (dev->mtu < m->dev->mtu)
211                                 m->dev->mtu = dev->mtu;
212                 }
213                 q->next = NEXT_SLAVE(m->slaves);
214                 NEXT_SLAVE(m->slaves) = sch;
215         } else {
216                 q->next = sch;
217                 m->slaves = sch;
218                 m->dev->mtu = dev->mtu;
219                 m->dev->flags = (m->dev->flags&~FMASK)|(dev->flags&FMASK);
220         }
221         return 0;
222 }
223
224
225 static int
226 __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res,
227                struct net_device *dev, struct netdev_queue *txq,
228                struct neighbour *mn)
229 {
230         struct teql_sched_data *q = qdisc_priv(txq->qdisc);
231         struct neighbour *n = q->ncache;
232
233         if (mn->tbl == NULL)
234                 return -EINVAL;
235         if (n && n->tbl == mn->tbl &&
236             memcmp(n->primary_key, mn->primary_key, mn->tbl->key_len) == 0) {
237                 atomic_inc(&n->refcnt);
238         } else {
239                 n = __neigh_lookup_errno(mn->tbl, mn->primary_key, dev);
240                 if (IS_ERR(n))
241                         return PTR_ERR(n);
242         }
243         if (neigh_event_send(n, skb_res) == 0) {
244                 int err;
245                 char haddr[MAX_ADDR_LEN];
246
247                 neigh_ha_snapshot(haddr, n, dev);
248                 err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr,
249                                       NULL, skb->len);
250
251                 if (err < 0) {
252                         neigh_release(n);
253                         return -EINVAL;
254                 }
255                 teql_neigh_release(xchg(&q->ncache, n));
256                 return 0;
257         }
258         neigh_release(n);
259         return (skb_res == NULL) ? -EAGAIN : 1;
260 }
261
262 static inline int teql_resolve(struct sk_buff *skb,
263                                struct sk_buff *skb_res,
264                                struct net_device *dev,
265                                struct netdev_queue *txq)
266 {
267         struct dst_entry *dst = skb_dst(skb);
268         struct neighbour *mn;
269         int res;
270
271         if (txq->qdisc == &noop_qdisc)
272                 return -ENODEV;
273
274         if (!dev->header_ops || !dst)
275                 return 0;
276
277         rcu_read_lock();
278         mn = dst_get_neighbour_noref(dst);
279         res = mn ? __teql_resolve(skb, skb_res, dev, txq, mn) : 0;
280         rcu_read_unlock();
281
282         return res;
283 }
284
285 static netdev_tx_t teql_master_xmit(struct sk_buff *skb, struct net_device *dev)
286 {
287         struct teql_master *master = netdev_priv(dev);
288         struct Qdisc *start, *q;
289         int busy;
290         int nores;
291         int subq = skb_get_queue_mapping(skb);
292         struct sk_buff *skb_res = NULL;
293
294         start = master->slaves;
295
296 restart:
297         nores = 0;
298         busy = 0;
299
300         q = start;
301         if (!q)
302                 goto drop;
303
304         do {
305                 struct net_device *slave = qdisc_dev(q);
306                 struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
307                 const struct net_device_ops *slave_ops = slave->netdev_ops;
308
309                 if (slave_txq->qdisc_sleeping != q)
310                         continue;
311                 if (netif_xmit_stopped(netdev_get_tx_queue(slave, subq)) ||
312                     !netif_running(slave)) {
313                         busy = 1;
314                         continue;
315                 }
316
317                 switch (teql_resolve(skb, skb_res, slave, slave_txq)) {
318                 case 0:
319                         if (__netif_tx_trylock(slave_txq)) {
320                                 unsigned int length = qdisc_pkt_len(skb);
321
322                                 if (!netif_xmit_frozen_or_stopped(slave_txq) &&
323                                     slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
324                                         txq_trans_update(slave_txq);
325                                         __netif_tx_unlock(slave_txq);
326                                         master->slaves = NEXT_SLAVE(q);
327                                         netif_wake_queue(dev);
328                                         master->tx_packets++;
329                                         master->tx_bytes += length;
330                                         return NETDEV_TX_OK;
331                                 }
332                                 __netif_tx_unlock(slave_txq);
333                         }
334                         if (netif_xmit_stopped(netdev_get_tx_queue(dev, 0)))
335                                 busy = 1;
336                         break;
337                 case 1:
338                         master->slaves = NEXT_SLAVE(q);
339                         return NETDEV_TX_OK;
340                 default:
341                         nores = 1;
342                         break;
343                 }
344                 __skb_pull(skb, skb_network_offset(skb));
345         } while ((q = NEXT_SLAVE(q)) != start);
346
347         if (nores && skb_res == NULL) {
348                 skb_res = skb;
349                 goto restart;
350         }
351
352         if (busy) {
353                 netif_stop_queue(dev);
354                 return NETDEV_TX_BUSY;
355         }
356         master->tx_errors++;
357
358 drop:
359         master->tx_dropped++;
360         dev_kfree_skb(skb);
361         return NETDEV_TX_OK;
362 }
363
364 static int teql_master_open(struct net_device *dev)
365 {
366         struct Qdisc *q;
367         struct teql_master *m = netdev_priv(dev);
368         int mtu = 0xFFFE;
369         unsigned int flags = IFF_NOARP | IFF_MULTICAST;
370
371         if (m->slaves == NULL)
372                 return -EUNATCH;
373
374         flags = FMASK;
375
376         q = m->slaves;
377         do {
378                 struct net_device *slave = qdisc_dev(q);
379
380                 if (slave == NULL)
381                         return -EUNATCH;
382
383                 if (slave->mtu < mtu)
384                         mtu = slave->mtu;
385                 if (slave->hard_header_len > LL_MAX_HEADER)
386                         return -EINVAL;
387
388                 /* If all the slaves are BROADCAST, master is BROADCAST
389                    If all the slaves are PtP, master is PtP
390                    Otherwise, master is NBMA.
391                  */
392                 if (!(slave->flags&IFF_POINTOPOINT))
393                         flags &= ~IFF_POINTOPOINT;
394                 if (!(slave->flags&IFF_BROADCAST))
395                         flags &= ~IFF_BROADCAST;
396                 if (!(slave->flags&IFF_MULTICAST))
397                         flags &= ~IFF_MULTICAST;
398         } while ((q = NEXT_SLAVE(q)) != m->slaves);
399
400         m->dev->mtu = mtu;
401         m->dev->flags = (m->dev->flags&~FMASK) | flags;
402         netif_start_queue(m->dev);
403         return 0;
404 }
405
406 static int teql_master_close(struct net_device *dev)
407 {
408         netif_stop_queue(dev);
409         return 0;
410 }
411
412 static struct rtnl_link_stats64 *teql_master_stats64(struct net_device *dev,
413                                                      struct rtnl_link_stats64 *stats)
414 {
415         struct teql_master *m = netdev_priv(dev);
416
417         stats->tx_packets       = m->tx_packets;
418         stats->tx_bytes         = m->tx_bytes;
419         stats->tx_errors        = m->tx_errors;
420         stats->tx_dropped       = m->tx_dropped;
421         return stats;
422 }
423
424 static int teql_master_mtu(struct net_device *dev, int new_mtu)
425 {
426         struct teql_master *m = netdev_priv(dev);
427         struct Qdisc *q;
428
429         if (new_mtu < 68)
430                 return -EINVAL;
431
432         q = m->slaves;
433         if (q) {
434                 do {
435                         if (new_mtu > qdisc_dev(q)->mtu)
436                                 return -EINVAL;
437                 } while ((q = NEXT_SLAVE(q)) != m->slaves);
438         }
439
440         dev->mtu = new_mtu;
441         return 0;
442 }
443
444 static const struct net_device_ops teql_netdev_ops = {
445         .ndo_open       = teql_master_open,
446         .ndo_stop       = teql_master_close,
447         .ndo_start_xmit = teql_master_xmit,
448         .ndo_get_stats64 = teql_master_stats64,
449         .ndo_change_mtu = teql_master_mtu,
450 };
451
452 static __init void teql_master_setup(struct net_device *dev)
453 {
454         struct teql_master *master = netdev_priv(dev);
455         struct Qdisc_ops *ops = &master->qops;
456
457         master->dev     = dev;
458         ops->priv_size  = sizeof(struct teql_sched_data);
459
460         ops->enqueue    =       teql_enqueue;
461         ops->dequeue    =       teql_dequeue;
462         ops->peek       =       teql_peek;
463         ops->init       =       teql_qdisc_init;
464         ops->reset      =       teql_reset;
465         ops->destroy    =       teql_destroy;
466         ops->owner      =       THIS_MODULE;
467
468         dev->netdev_ops =       &teql_netdev_ops;
469         dev->type               = ARPHRD_VOID;
470         dev->mtu                = 1500;
471         dev->tx_queue_len       = 100;
472         dev->flags              = IFF_NOARP;
473         dev->hard_header_len    = LL_MAX_HEADER;
474         dev->priv_flags         &= ~IFF_XMIT_DST_RELEASE;
475 }
476
477 static LIST_HEAD(master_dev_list);
478 static int max_equalizers = 1;
479 module_param(max_equalizers, int, 0);
480 MODULE_PARM_DESC(max_equalizers, "Max number of link equalizers");
481
482 static int __init teql_init(void)
483 {
484         int i;
485         int err = -ENODEV;
486
487         for (i = 0; i < max_equalizers; i++) {
488                 struct net_device *dev;
489                 struct teql_master *master;
490
491                 dev = alloc_netdev(sizeof(struct teql_master),
492                                   "teql%d", teql_master_setup);
493                 if (!dev) {
494                         err = -ENOMEM;
495                         break;
496                 }
497
498                 if ((err = register_netdev(dev))) {
499                         free_netdev(dev);
500                         break;
501                 }
502
503                 master = netdev_priv(dev);
504
505                 strlcpy(master->qops.id, dev->name, IFNAMSIZ);
506                 err = register_qdisc(&master->qops);
507
508                 if (err) {
509                         unregister_netdev(dev);
510                         free_netdev(dev);
511                         break;
512                 }
513
514                 list_add_tail(&master->master_list, &master_dev_list);
515         }
516         return i ? 0 : err;
517 }
518
519 static void __exit teql_exit(void)
520 {
521         struct teql_master *master, *nxt;
522
523         list_for_each_entry_safe(master, nxt, &master_dev_list, master_list) {
524
525                 list_del(&master->master_list);
526
527                 unregister_qdisc(&master->qops);
528                 unregister_netdev(master->dev);
529                 free_netdev(master->dev);
530         }
531 }
532
533 module_init(teql_init);
534 module_exit(teql_exit);
535
536 MODULE_LICENSE("GPL");