net/core/dev.c

   1 /*
   2  *      NET3    Protocol independent device support routines.
   3  *
   4  *              This program is free software; you can redistribute it and/or
   5  *              modify it under the terms of the GNU General Public License
   6  *              as published by the Free Software Foundation; either version
   7  *              2 of the License, or (at your option) any later version.
   8  *
   9  *      Derived from the non IP parts of dev.c 1.0.19
  10  *              Authors:        Ross Biro
  11  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *
  14  *      Additional Authors:
  15  *              Florian la Roche <rzsfl@rz.uni-sb.de>
  16  *              Alan Cox <gw4pts@gw4pts.ampr.org>
  17  *              David Hinds <dahinds@users.sourceforge.net>
  18  *              Alexey Kuznetsov <kuznet@ms2.inr.ac.ru>
  19  *              Adam Sulmicki <adam@cfar.umd.edu>
  20  *              Pekka Riikonen <priikone@poesidon.pspt.fi>
  21  *
  22  *      Changes:
  23  *              D.J. Barrow     :       Fixed bug where dev->refcnt gets set
  24  *                                      to 2 if register_netdev gets called
  25  *                                      before net_dev_init & also removed a
  26  *                                      few lines of code in the process.
  27  *              Alan Cox        :       device private ioctl copies fields back.
  28  *              Alan Cox        :       Transmit queue code does relevant
  29  *                                      stunts to keep the queue safe.
  30  *              Alan Cox        :       Fixed double lock.
  31  *              Alan Cox        :       Fixed promisc NULL pointer trap
  32  *              ????????        :       Support the full private ioctl range
  33  *              Alan Cox        :       Moved ioctl permission check into
  34  *                                      drivers
  35  *              Tim Kordas      :       SIOCADDMULTI/SIOCDELMULTI
  36  *              Alan Cox        :       100 backlog just doesn't cut it when
  37  *                                      you start doing multicast video 8)
  38  *              Alan Cox        :       Rewrote net_bh and list manager.
  39  *              Alan Cox        :       Fix ETH_P_ALL echoback lengths.
  40  *              Alan Cox        :       Took out transmit every packet pass
  41  *                                      Saved a few bytes in the ioctl handler
  42  *              Alan Cox        :       Network driver sets packet type before
  43  *                                      calling netif_rx. Saves a function
  44  *                                      call a packet.
  45  *              Alan Cox        :       Hashed net_bh()
  46  *              Richard Kooijman:       Timestamp fixes.
  47  *              Alan Cox        :       Wrong field in SIOCGIFDSTADDR
  48  *              Alan Cox        :       Device lock protection.
  49  *              Alan Cox        :       Fixed nasty side effect of device close
  50  *                                      changes.
  51  *              Rudi Cilibrasi  :       Pass the right thing to
  52  *                                      set_mac_address()
  53  *              Dave Miller     :       32bit quantity for the device lock to
  54  *                                      make it work out on a Sparc.
  55  *              Bjorn Ekwall    :       Added KERNELD hack.
  56  *              Alan Cox        :       Cleaned up the backlog initialise.
  57  *              Craig Metz      :       SIOCGIFCONF fix if space for under
  58  *                                      1 device.
  59  *          Thomas Bogendoerfer :       Return ENODEV for dev_open, if there
  60  *                                      is no device open function.
  61  *              Andi Kleen      :       Fix error reporting for SIOCGIFCONF
  62  *          Michael Chastain    :       Fix signed/unsigned for SIOCGIFCONF
  63  *              Cyrus Durgin    :       Cleaned for KMOD
  64  *              Adam Sulmicki   :       Bug Fix : Network Device Unload
  65  *                                      A network device unload needs to purge
  66  *                                      the backlog queue.
  67  *      Paul Rusty Russell      :       SIOCSIFNAME
  68  *              Pekka Riikonen  :       Netdev boot-time settings code
  69  *              Andrew Morton   :       Make unregister_netdevice wait
  70  *                                      indefinitely on dev->refcnt
  71  *              J Hadi Salim    :       - Backlog queue sampling
  72  *                                      - netif_rx() feedback
  73  */
  74
  75 #include <asm/uaccess.h>
  76 #include <asm/system.h>
  77 #include <linux/bitops.h>
  78 #include <linux/capability.h>
  79 #include <linux/cpu.h>
  80 #include <linux/types.h>
  81 #include <linux/kernel.h>
  82 #include <linux/sched.h>
  83 #include <linux/mutex.h>
  84 #include <linux/string.h>
  85 #include <linux/mm.h>
  86 #include <linux/socket.h>
  87 #include <linux/sockios.h>
  88 #include <linux/errno.h>
  89 #include <linux/interrupt.h>
  90 #include <linux/if_ether.h>
  91 #include <linux/netdevice.h>
  92 #include <linux/etherdevice.h>
  93 #include <linux/notifier.h>
  94 #include <linux/skbuff.h>
  95 #include <net/sock.h>
  96 #include <linux/rtnetlink.h>
  97 #include <linux/proc_fs.h>
  98 #include <linux/seq_file.h>
  99 #include <linux/stat.h>
 100 #include <linux/if_bridge.h>
 101 #include <linux/if_macvlan.h>
 102 #include <net/dst.h>
 103 #include <net/pkt_sched.h>
 104 #include <net/checksum.h>
 105 #include <linux/highmem.h>
 106 #include <linux/init.h>
 107 #include <linux/kmod.h>
 108 #include <linux/module.h>
 109 #include <linux/kallsyms.h>
 110 #include <linux/netpoll.h>
 111 #include <linux/rcupdate.h>
 112 #include <linux/delay.h>
 113 #include <net/wext.h>
 114 #include <net/iw_handler.h>
 115 #include <asm/current.h>
 116 #include <linux/audit.h>
 117 #include <linux/dmaengine.h>
 118 #include <linux/err.h>
 119 #include <linux/ctype.h>
 120 #include <linux/if_arp.h>
 121
 122 /*
 123  *      The list of packet types we will receive (as opposed to discard)
 124  *      and the routines to invoke.
 125  *
 126  *      Why 16. Because with 16 the only overlap we get on a hash of the
 127  *      low nibble of the protocol value is RARP/SNAP/X.25.
 128  *
 129  *      NOTE:  That is no longer true with the addition of VLAN tags.  Not
 130  *             sure which should go first, but I bet it won't make much
 131  *             difference if we are running VLANs.  The good news is that
 132  *             this protocol won't be in the list unless compiled in, so
 133  *             the average user (w/out VLANs) will not be adversely affected.
 134  *             --BLG
 135  *
 136  *              0800    IP
 137  *              8100    802.1Q VLAN
 138  *              0001    802.3
 139  *              0002    AX.25
 140  *              0004    802.2
 141  *              8035    RARP
 142  *              0005    SNAP
 143  *              0805    X.25
 144  *              0806    ARP
 145  *              8137    IPX
 146  *              0009    Localtalk
 147  *              86DD    IPv6
 148  */
 149
 150 static DEFINE_SPINLOCK(ptype_lock);
 151 static struct list_head ptype_base[16] __read_mostly;   /* 16 way hashed list */
 152 static struct list_head ptype_all __read_mostly;        /* Taps */
 153
 154 #ifdef CONFIG_NET_DMA
 155 struct net_dma {
 156         struct dma_client client;
 157         spinlock_t lock;
 158         cpumask_t channel_mask;
 159         struct dma_chan *channels[NR_CPUS];
 160 };
 161
 162 static enum dma_state_client
 163 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
 164         enum dma_state state);
 165
 166 static struct net_dma net_dma = {
 167         .client = {
 168                 .event_callback = netdev_dma_event,
 169         },
 170 };
 171 #endif
 172
 173 /*
 174  * The @dev_base_head list is protected by @dev_base_lock and the rtnl
 175  * semaphore.
 176  *
 177  * Pure readers hold dev_base_lock for reading.
 178  *
 179  * Writers must hold the rtnl semaphore while they loop through the
 180  * dev_base_head list, and hold dev_base_lock for writing when they do the
 181  * actual updates.  This allows pure readers to access the list even
 182  * while a writer is preparing to update it.
 183  *
 184  * To put it another way, dev_base_lock is held for writing only to
 185  * protect against pure readers; the rtnl semaphore provides the
 186  * protection against other writers.
 187  *
 188  * See, for example usages, register_netdevice() and
 189  * unregister_netdevice(), which must be called with the rtnl
 190  * semaphore held.
 191  */
 192 LIST_HEAD(dev_base_head);
 193 DEFINE_RWLOCK(dev_base_lock);
 194
 195 EXPORT_SYMBOL(dev_base_head);
 196 EXPORT_SYMBOL(dev_base_lock);
 197
 198 #define NETDEV_HASHBITS 8
 199 static struct hlist_head dev_name_head[1<<NETDEV_HASHBITS];
 200 static struct hlist_head dev_index_head[1<<NETDEV_HASHBITS];
 201
 202 static inline struct hlist_head *dev_name_hash(const char *name)
 203 {
 204         unsigned hash = full_name_hash(name, strnlen(name, IFNAMSIZ));
 205         return &dev_name_head[hash & ((1<<NETDEV_HASHBITS)-1)];
 206 }
 207
 208 static inline struct hlist_head *dev_index_hash(int ifindex)
 209 {
 210         return &dev_index_head[ifindex & ((1<<NETDEV_HASHBITS)-1)];
 211 }
 212
 213 /*
 214  *      Our notifier list
 215  */
 216
 217 static RAW_NOTIFIER_HEAD(netdev_chain);
 218
 219 /*
 220  *      Device drivers call our routines to queue packets here. We empty the
 221  *      queue in the local softnet handler.
 222  */
 223 DEFINE_PER_CPU(struct softnet_data, softnet_data) = { NULL };
 224
 225 #ifdef CONFIG_SYSFS
 226 extern int netdev_sysfs_init(void);
 227 extern int netdev_register_sysfs(struct net_device *);
 228 extern void netdev_unregister_sysfs(struct net_device *);
 229 #else
 230 #define netdev_sysfs_init()             (0)
 231 #define netdev_register_sysfs(dev)      (0)
 232 #define netdev_unregister_sysfs(dev)    do { } while(0)
 233 #endif
 234
 235 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 236 /*
 237  * register_netdevice() inits dev->_xmit_lock and sets lockdep class
 238  * according to dev->type
 239  */
 240 static const unsigned short netdev_lock_type[] =
 241         {ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
 242          ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
 243          ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
 244          ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
 245          ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
 246          ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
 247          ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
 248          ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
 249          ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
 250          ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
 251          ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
 252          ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
 253          ARPHRD_FCFABRIC, ARPHRD_IEEE802_TR, ARPHRD_IEEE80211,
 254          ARPHRD_IEEE80211_PRISM, ARPHRD_IEEE80211_RADIOTAP, ARPHRD_VOID,
 255          ARPHRD_NONE};
 256
 257 static const char *netdev_lock_name[] =
 258         {"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
 259          "_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
 260          "_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
 261          "_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
 262          "_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
 263          "_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
 264          "_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
 265          "_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
 266          "_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
 267          "_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
 268          "_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
 269          "_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
 270          "_xmit_FCFABRIC", "_xmit_IEEE802_TR", "_xmit_IEEE80211",
 271          "_xmit_IEEE80211_PRISM", "_xmit_IEEE80211_RADIOTAP", "_xmit_VOID",
 272          "_xmit_NONE"};
 273
 274 static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
 275
 276 static inline unsigned short netdev_lock_pos(unsigned short dev_type)
 277 {
 278         int i;
 279
 280         for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
 281                 if (netdev_lock_type[i] == dev_type)
 282                         return i;
 283         /* the last key is used by default */
 284         return ARRAY_SIZE(netdev_lock_type) - 1;
 285 }
 286
 287 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 288                                             unsigned short dev_type)
 289 {
 290         int i;
 291
 292         i = netdev_lock_pos(dev_type);
 293         lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
 294                                    netdev_lock_name[i]);
 295 }
 296 #else
 297 static inline void netdev_set_lockdep_class(spinlock_t *lock,
 298                                             unsigned short dev_type)
 299 {
 300 }
 301 #endif
 302
 303 /*******************************************************************************
 304
 305                 Protocol management and registration routines
 306
 307 *******************************************************************************/
 308
 309 /*
 310  *      Add a protocol ID to the list. Now that the input handler is
 311  *      smarter we can dispense with all the messy stuff that used to be
 312  *      here.
 313  *
 314  *      BEWARE!!! Protocol handlers, mangling input packets,
 315  *      MUST BE last in hash buckets and checking protocol handlers
 316  *      MUST start from promiscuous ptype_all chain in net_bh.
 317  *      It is true now, do not change it.
 318  *      Explanation follows: if protocol handler, mangling packet, will
 319  *      be the first on list, it is not able to sense, that packet
 320  *      is cloned and should be copied-on-write, so that it will
 321  *      change it and subsequent readers will get broken packet.
 322  *                                                      --ANK (980803)
 323  */
 324
 325 /**
 326  *      dev_add_pack - add packet handler
 327  *      @pt: packet type declaration
 328  *
 329  *      Add a protocol handler to the networking stack. The passed &packet_type
 330  *      is linked into kernel lists and may not be freed until it has been
 331  *      removed from the kernel lists.
 332  *
 333  *      This call does not sleep therefore it can not
 334  *      guarantee all CPU's that are in middle of receiving packets
 335  *      will see the new packet type (until the next received packet).
 336  */
 337
 338 void dev_add_pack(struct packet_type *pt)
 339 {
 340         int hash;
 341
 342         spin_lock_bh(&ptype_lock);
 343         if (pt->type == htons(ETH_P_ALL))
 344                 list_add_rcu(&pt->list, &ptype_all);
 345         else {
 346                 hash = ntohs(pt->type) & 15;
 347                 list_add_rcu(&pt->list, &ptype_base[hash]);
 348         }
 349         spin_unlock_bh(&ptype_lock);
 350 }
 351
 352 /**
 353  *      __dev_remove_pack        - remove packet handler
 354  *      @pt: packet type declaration
 355  *
 356  *      Remove a protocol handler that was previously added to the kernel
 357  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 358  *      from the kernel lists and can be freed or reused once this function
 359  *      returns.
 360  *
 361  *      The packet type might still be in use by receivers
 362  *      and must not be freed until after all the CPU's have gone
 363  *      through a quiescent state.
 364  */
 365 void __dev_remove_pack(struct packet_type *pt)
 366 {
 367         struct list_head *head;
 368         struct packet_type *pt1;
 369
 370         spin_lock_bh(&ptype_lock);
 371
 372         if (pt->type == htons(ETH_P_ALL))
 373                 head = &ptype_all;
 374         else
 375                 head = &ptype_base[ntohs(pt->type) & 15];
 376
 377         list_for_each_entry(pt1, head, list) {
 378                 if (pt == pt1) {
 379                         list_del_rcu(&pt->list);
 380                         goto out;
 381                 }
 382         }
 383
 384         printk(KERN_WARNING "dev_remove_pack: %p not found.\n", pt);
 385 out:
 386         spin_unlock_bh(&ptype_lock);
 387 }
 388 /**
 389  *      dev_remove_pack  - remove packet handler
 390  *      @pt: packet type declaration
 391  *
 392  *      Remove a protocol handler that was previously added to the kernel
 393  *      protocol handlers by dev_add_pack(). The passed &packet_type is removed
 394  *      from the kernel lists and can be freed or reused once this function
 395  *      returns.
 396  *
 397  *      This call sleeps to guarantee that no CPU is looking at the packet
 398  *      type after return.
 399  */
 400 void dev_remove_pack(struct packet_type *pt)
 401 {
 402         __dev_remove_pack(pt);
 403
 404         synchronize_net();
 405 }
 406
 407 /******************************************************************************
 408
 409                       Device Boot-time Settings Routines
 410
 411 *******************************************************************************/
 412
 413 /* Boot time configuration table */
 414 static struct netdev_boot_setup dev_boot_setup[NETDEV_BOOT_SETUP_MAX];
 415
 416 /**
 417  *      netdev_boot_setup_add   - add new setup entry
 418  *      @name: name of the device
 419  *      @map: configured settings for the device
 420  *
 421  *      Adds new setup entry to the dev_boot_setup list.  The function
 422  *      returns 0 on error and 1 on success.  This is a generic routine to
 423  *      all netdevices.
 424  */
 425 static int netdev_boot_setup_add(char *name, struct ifmap *map)
 426 {
 427         struct netdev_boot_setup *s;
 428         int i;
 429
 430         s = dev_boot_setup;
 431         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 432                 if (s[i].name[0] == '\0' || s[i].name[0] == ' ') {
 433                         memset(s[i].name, 0, sizeof(s[i].name));
 434                         strcpy(s[i].name, name);
 435                         memcpy(&s[i].map, map, sizeof(s[i].map));
 436                         break;
 437                 }
 438         }
 439
 440         return i >= NETDEV_BOOT_SETUP_MAX ? 0 : 1;
 441 }
 442
 443 /**
 444  *      netdev_boot_setup_check - check boot time settings
 445  *      @dev: the netdevice
 446  *
 447  *      Check boot time settings for the device.
 448  *      The found settings are set for the device to be used
 449  *      later in the device probing.
 450  *      Returns 0 if no settings found, 1 if they are.
 451  */
 452 int netdev_boot_setup_check(struct net_device *dev)
 453 {
 454         struct netdev_boot_setup *s = dev_boot_setup;
 455         int i;
 456
 457         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++) {
 458                 if (s[i].name[0] != '\0' && s[i].name[0] != ' ' &&
 459                     !strncmp(dev->name, s[i].name, strlen(s[i].name))) {
 460                         dev->irq        = s[i].map.irq;
 461                         dev->base_addr  = s[i].map.base_addr;
 462                         dev->mem_start  = s[i].map.mem_start;
 463                         dev->mem_end    = s[i].map.mem_end;
 464                         return 1;
 465                 }
 466         }
 467         return 0;
 468 }
 469
 470
 471 /**
 472  *      netdev_boot_base        - get address from boot time settings
 473  *      @prefix: prefix for network device
 474  *      @unit: id for network device
 475  *
 476  *      Check boot time settings for the base address of device.
 477  *      The found settings are set for the device to be used
 478  *      later in the device probing.
 479  *      Returns 0 if no settings found.
 480  */
 481 unsigned long netdev_boot_base(const char *prefix, int unit)
 482 {
 483         const struct netdev_boot_setup *s = dev_boot_setup;
 484         char name[IFNAMSIZ];
 485         int i;
 486
 487         sprintf(name, "%s%d", prefix, unit);
 488
 489         /*
 490          * If device already registered then return base of 1
 491          * to indicate not to probe for this interface
 492          */
 493         if (__dev_get_by_name(name))
 494                 return 1;
 495
 496         for (i = 0; i < NETDEV_BOOT_SETUP_MAX; i++)
 497                 if (!strcmp(name, s[i].name))
 498                         return s[i].map.base_addr;
 499         return 0;
 500 }
 501
 502 /*
 503  * Saves at boot time configured settings for any netdevice.
 504  */
 505 int __init netdev_boot_setup(char *str)
 506 {
 507         int ints[5];
 508         struct ifmap map;
 509
 510         str = get_options(str, ARRAY_SIZE(ints), ints);
 511         if (!str || !*str)
 512                 return 0;
 513
 514         /* Save settings */
 515         memset(&map, 0, sizeof(map));
 516         if (ints[0] > 0)
 517                 map.irq = ints[1];
 518         if (ints[0] > 1)
 519                 map.base_addr = ints[2];
 520         if (ints[0] > 2)
 521                 map.mem_start = ints[3];
 522         if (ints[0] > 3)
 523                 map.mem_end = ints[4];
 524
 525         /* Add new entry to the list */
 526         return netdev_boot_setup_add(str, &map);
 527 }
 528
 529 __setup("netdev=", netdev_boot_setup);
 530
 531 /*******************************************************************************
 532
 533                             Device Interface Subroutines
 534
 535 *******************************************************************************/
 536
 537 /**
 538  *      __dev_get_by_name       - find a device by its name
 539  *      @name: name to find
 540  *
 541  *      Find an interface by name. Must be called under RTNL semaphore
 542  *      or @dev_base_lock. If the name is found a pointer to the device
 543  *      is returned. If the name is not found then %NULL is returned. The
 544  *      reference counters are not incremented so the caller must be
 545  *      careful with locks.
 546  */
 547
 548 struct net_device *__dev_get_by_name(const char *name)
 549 {
 550         struct hlist_node *p;
 551
 552         hlist_for_each(p, dev_name_hash(name)) {
 553                 struct net_device *dev
 554                         = hlist_entry(p, struct net_device, name_hlist);
 555                 if (!strncmp(dev->name, name, IFNAMSIZ))
 556                         return dev;
 557         }
 558         return NULL;
 559 }
 560
 561 /**
 562  *      dev_get_by_name         - find a device by its name
 563  *      @name: name to find
 564  *
 565  *      Find an interface by name. This can be called from any
 566  *      context and does its own locking. The returned handle has
 567  *      the usage count incremented and the caller must use dev_put() to
 568  *      release it when it is no longer needed. %NULL is returned if no
 569  *      matching device is found.
 570  */
 571
 572 struct net_device *dev_get_by_name(const char *name)
 573 {
 574         struct net_device *dev;
 575
 576         read_lock(&dev_base_lock);
 577         dev = __dev_get_by_name(name);
 578         if (dev)
 579                 dev_hold(dev);
 580         read_unlock(&dev_base_lock);
 581         return dev;
 582 }
 583
 584 /**
 585  *      __dev_get_by_index - find a device by its ifindex
 586  *      @ifindex: index of device
 587  *
 588  *      Search for an interface by index. Returns %NULL if the device
 589  *      is not found or a pointer to the device. The device has not
 590  *      had its reference counter increased so the caller must be careful
 591  *      about locking. The caller must hold either the RTNL semaphore
 592  *      or @dev_base_lock.
 593  */
 594
 595 struct net_device *__dev_get_by_index(int ifindex)
 596 {
 597         struct hlist_node *p;
 598
 599         hlist_for_each(p, dev_index_hash(ifindex)) {
 600                 struct net_device *dev
 601                         = hlist_entry(p, struct net_device, index_hlist);
 602                 if (dev->ifindex == ifindex)
 603                         return dev;
 604         }
 605         return NULL;
 606 }
 607
 608
 609 /**
 610  *      dev_get_by_index - find a device by its ifindex
 611  *      @ifindex: index of device
 612  *
 613  *      Search for an interface by index. Returns NULL if the device
 614  *      is not found or a pointer to the device. The device returned has
 615  *      had a reference added and the pointer is safe until the user calls
 616  *      dev_put to indicate they have finished with it.
 617  */
 618
 619 struct net_device *dev_get_by_index(int ifindex)
 620 {
 621         struct net_device *dev;
 622
 623         read_lock(&dev_base_lock);
 624         dev = __dev_get_by_index(ifindex);
 625         if (dev)
 626                 dev_hold(dev);
 627         read_unlock(&dev_base_lock);
 628         return dev;
 629 }
 630
 631 /**
 632  *      dev_getbyhwaddr - find a device by its hardware address
 633  *      @type: media type of device
 634  *      @ha: hardware address
 635  *
 636  *      Search for an interface by MAC address. Returns NULL if the device
 637  *      is not found or a pointer to the device. The caller must hold the
 638  *      rtnl semaphore. The returned device has not had its ref count increased
 639  *      and the caller must therefore be careful about locking
 640  *
 641  *      BUGS:
 642  *      If the API was consistent this would be __dev_get_by_hwaddr
 643  */
 644
 645 struct net_device *dev_getbyhwaddr(unsigned short type, char *ha)
 646 {
 647         struct net_device *dev;
 648
 649         ASSERT_RTNL();
 650
 651         for_each_netdev(dev)
 652                 if (dev->type == type &&
 653                     !memcmp(dev->dev_addr, ha, dev->addr_len))
 654                         return dev;
 655
 656         return NULL;
 657 }
 658
 659 EXPORT_SYMBOL(dev_getbyhwaddr);
 660
 661 struct net_device *__dev_getfirstbyhwtype(unsigned short type)
 662 {
 663         struct net_device *dev;
 664
 665         ASSERT_RTNL();
 666         for_each_netdev(dev)
 667                 if (dev->type == type)
 668                         return dev;
 669
 670         return NULL;
 671 }
 672
 673 EXPORT_SYMBOL(__dev_getfirstbyhwtype);
 674
 675 struct net_device *dev_getfirstbyhwtype(unsigned short type)
 676 {
 677         struct net_device *dev;
 678
 679         rtnl_lock();
 680         dev = __dev_getfirstbyhwtype(type);
 681         if (dev)
 682                 dev_hold(dev);
 683         rtnl_unlock();
 684         return dev;
 685 }
 686
 687 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 688
 689 /**
 690  *      dev_get_by_flags - find any device with given flags
 691  *      @if_flags: IFF_* values
 692  *      @mask: bitmask of bits in if_flags to check
 693  *
 694  *      Search for any interface with the given flags. Returns NULL if a device
 695  *      is not found or a pointer to the device. The device returned has
 696  *      had a reference added and the pointer is safe until the user calls
 697  *      dev_put to indicate they have finished with it.
 698  */
 699
 700 struct net_device * dev_get_by_flags(unsigned short if_flags, unsigned short mask)
 701 {
 702         struct net_device *dev, *ret;
 703
 704         ret = NULL;
 705         read_lock(&dev_base_lock);
 706         for_each_netdev(dev) {
 707                 if (((dev->flags ^ if_flags) & mask) == 0) {
 708                         dev_hold(dev);
 709                         ret = dev;
 710                         break;
 711                 }
 712         }
 713         read_unlock(&dev_base_lock);
 714         return ret;
 715 }
 716
 717 /**
 718  *      dev_valid_name - check if name is okay for network device
 719  *      @name: name string
 720  *
 721  *      Network device names need to be valid file names to
 722  *      to allow sysfs to work.  We also disallow any kind of
 723  *      whitespace.
 724  */
 725 int dev_valid_name(const char *name)
 726 {
 727         if (*name == '\0')
 728                 return 0;
 729         if (strlen(name) >= IFNAMSIZ)
 730                 return 0;
 731         if (!strcmp(name, ".") || !strcmp(name, ".."))
 732                 return 0;
 733
 734         while (*name) {
 735                 if (*name == '/' || isspace(*name))
 736                         return 0;
 737                 name++;
 738         }
 739         return 1;
 740 }
 741
 742 /**
 743  *      dev_alloc_name - allocate a name for a device
 744  *      @dev: device
 745  *      @name: name format string
 746  *
 747  *      Passed a format string - eg "lt%d" it will try and find a suitable
 748  *      id. It scans list of devices to build up a free map, then chooses
 749  *      the first empty slot. The caller must hold the dev_base or rtnl lock
 750  *      while allocating the name and adding the device in order to avoid
 751  *      duplicates.
 752  *      Limited to bits_per_byte * page size devices (ie 32K on most platforms).
 753  *      Returns the number of the unit assigned or a negative errno code.
 754  */
 755
 756 int dev_alloc_name(struct net_device *dev, const char *name)
 757 {
 758         int i = 0;
 759         char buf[IFNAMSIZ];
 760         const char *p;
 761         const int max_netdevices = 8*PAGE_SIZE;
 762         long *inuse;
 763         struct net_device *d;
 764
 765         p = strnchr(name, IFNAMSIZ-1, '%');
 766         if (p) {
 767                 /*
 768                  * Verify the string as this thing may have come from
 769                  * the user.  There must be either one "%d" and no other "%"
 770                  * characters.
 771                  */
 772                 if (p[1] != 'd' || strchr(p + 2, '%'))
 773                         return -EINVAL;
 774
 775                 /* Use one page as a bit array of possible slots */
 776                 inuse = (long *) get_zeroed_page(GFP_ATOMIC);
 777                 if (!inuse)
 778                         return -ENOMEM;
 779
 780                 for_each_netdev(d) {
 781                         if (!sscanf(d->name, name, &i))
 782                                 continue;
 783                         if (i < 0 || i >= max_netdevices)
 784                                 continue;
 785
 786                         /*  avoid cases where sscanf is not exact inverse of printf */
 787                         snprintf(buf, sizeof(buf), name, i);
 788                         if (!strncmp(buf, d->name, IFNAMSIZ))
 789                                 set_bit(i, inuse);
 790                 }
 791
 792                 i = find_first_zero_bit(inuse, max_netdevices);
 793                 free_page((unsigned long) inuse);
 794         }
 795
 796         snprintf(buf, sizeof(buf), name, i);
 797         if (!__dev_get_by_name(buf)) {
 798                 strlcpy(dev->name, buf, IFNAMSIZ);
 799                 return i;
 800         }
 801
 802         /* It is possible to run out of possible slots
 803          * when the name is long and there isn't enough space left
 804          * for the digits, or if all bits are used.
 805          */
 806         return -ENFILE;
 807 }
 808
 809
 810 /**
 811  *      dev_change_name - change name of a device
 812  *      @dev: device
 813  *      @newname: name (or format string) must be at least IFNAMSIZ
 814  *
 815  *      Change name of a device, can pass format strings "eth%d".
 816  *      for wildcarding.
 817  */
 818 int dev_change_name(struct net_device *dev, char *newname)
 819 {
 820         char oldname[IFNAMSIZ];
 821         int err = 0;
 822         int ret;
 823
 824         ASSERT_RTNL();
 825
 826         if (dev->flags & IFF_UP)
 827                 return -EBUSY;
 828
 829         if (!dev_valid_name(newname))
 830                 return -EINVAL;
 831
 832         memcpy(oldname, dev->name, IFNAMSIZ);
 833
 834         if (strchr(newname, '%')) {
 835                 err = dev_alloc_name(dev, newname);
 836                 if (err < 0)
 837                         return err;
 838                 strcpy(newname, dev->name);
 839         }
 840         else if (__dev_get_by_name(newname))
 841                 return -EEXIST;
 842         else
 843                 strlcpy(dev->name, newname, IFNAMSIZ);
 844
 845 rollback:
 846         device_rename(&dev->dev, dev->name);
 847
 848         write_lock_bh(&dev_base_lock);
 849         hlist_del(&dev->name_hlist);
 850         hlist_add_head(&dev->name_hlist, dev_name_hash(dev->name));
 851         write_unlock_bh(&dev_base_lock);
 852
 853         ret = raw_notifier_call_chain(&netdev_chain, NETDEV_CHANGENAME, dev);
 854         ret = notifier_to_errno(ret);
 855
 856         if (ret) {
 857                 if (err) {
 858                         printk(KERN_ERR
 859                                "%s: name change rollback failed: %d.\n",
 860                                dev->name, ret);
 861                 } else {
 862                         err = ret;
 863                         memcpy(dev->name, oldname, IFNAMSIZ);
 864                         goto rollback;
 865                 }
 866         }
 867
 868         return err;
 869 }
 870
 871 /**
 872  *      netdev_features_change - device changes features
 873  *      @dev: device to cause notification
 874  *
 875  *      Called to indicate a device has changed features.
 876  */
 877 void netdev_features_change(struct net_device *dev)
 878 {
 879         raw_notifier_call_chain(&netdev_chain, NETDEV_FEAT_CHANGE, dev);
 880 }
 881 EXPORT_SYMBOL(netdev_features_change);
 882
 883 /**
 884  *      netdev_state_change - device changes state
 885  *      @dev: device to cause notification
 886  *
 887  *      Called to indicate a device has changed state. This function calls
 888  *      the notifier chains for netdev_chain and sends a NEWLINK message
 889  *      to the routing socket.
 890  */
 891 void netdev_state_change(struct net_device *dev)
 892 {
 893         if (dev->flags & IFF_UP) {
 894                 raw_notifier_call_chain(&netdev_chain,
 895                                 NETDEV_CHANGE, dev);
 896                 rtmsg_ifinfo(RTM_NEWLINK, dev, 0);
 897         }
 898 }
 899
 900 /**
 901  *      dev_load        - load a network module
 902  *      @name: name of interface
 903  *
 904  *      If a network interface is not present and the process has suitable
 905  *      privileges this function loads the module. If module loading is not
 906  *      available in this kernel then it becomes a nop.
 907  */
 908
 909 void dev_load(const char *name)
 910 {
 911         struct net_device *dev;
 912
 913         read_lock(&dev_base_lock);
 914         dev = __dev_get_by_name(name);
 915         read_unlock(&dev_base_lock);
 916
 917         if (!dev && capable(CAP_SYS_MODULE))
 918                 request_module("%s", name);
 919 }
 920
 921 static int default_rebuild_header(struct sk_buff *skb)
 922 {
 923         printk(KERN_DEBUG "%s: default_rebuild_header called -- BUG!\n",
 924                skb->dev ? skb->dev->name : "NULL!!!");
 925         kfree_skb(skb);
 926         return 1;
 927 }
 928
 929 /**
 930  *      dev_open        - prepare an interface for use.
 931  *      @dev:   device to open
 932  *
 933  *      Takes a device from down to up state. The device's private open
 934  *      function is invoked and then the multicast lists are loaded. Finally
 935  *      the device is moved into the up state and a %NETDEV_UP message is
 936  *      sent to the netdev notifier chain.
 937  *
 938  *      Calling this function on an active interface is a nop. On a failure
 939  *      a negative errno code is returned.
 940  */
 941 int dev_open(struct net_device *dev)
 942 {
 943         int ret = 0;
 944
 945         /*
 946          *      Is it already up?
 947          */
 948
 949         if (dev->flags & IFF_UP)
 950                 return 0;
 951
 952         /*
 953          *      Is it even present?
 954          */
 955         if (!netif_device_present(dev))
 956                 return -ENODEV;
 957
 958         /*
 959          *      Call device private open method
 960          */
 961         set_bit(__LINK_STATE_START, &dev->state);
 962         if (dev->open) {
 963                 ret = dev->open(dev);
 964                 if (ret)
 965                         clear_bit(__LINK_STATE_START, &dev->state);
 966         }
 967
 968         /*
 969          *      If it went open OK then:
 970          */
 971
 972         if (!ret) {
 973                 /*
 974                  *      Set the flags.
 975                  */
 976                 dev->flags |= IFF_UP;
 977
 978                 /*
 979                  *      Initialize multicasting status
 980                  */
 981                 dev_set_rx_mode(dev);
 982
 983                 /*
 984                  *      Wakeup transmit queue engine
 985                  */
 986                 dev_activate(dev);
 987
 988                 /*
 989                  *      ... and announce new interface.
 990                  */
 991                 raw_notifier_call_chain(&netdev_chain, NETDEV_UP, dev);
 992         }
 993         return ret;
 994 }
 995
 996 /**
 997  *      dev_close - shutdown an interface.
 998  *      @dev: device to shutdown
 999  *
1000  *      This function moves an active device into down state. A
1001  *      %NETDEV_GOING_DOWN is sent to the netdev notifier chain. The device
1002  *      is then deactivated and finally a %NETDEV_DOWN is sent to the notifier
1003  *      chain.
1004  */
1005 int dev_close(struct net_device *dev)
1006 {
1007         if (!(dev->flags & IFF_UP))
1008                 return 0;
1009
1010         /*
1011          *      Tell people we are going down, so that they can
1012          *      prepare to death, when device is still operating.
1013          */
1014         raw_notifier_call_chain(&netdev_chain, NETDEV_GOING_DOWN, dev);
1015
1016         dev_deactivate(dev);
1017
1018         clear_bit(__LINK_STATE_START, &dev->state);
1019
1020         /* Synchronize to scheduled poll. We cannot touch poll list,
1021          * it can be even on different cpu. So just clear netif_running(),
1022          * and wait when poll really will happen. Actually, the best place
1023          * for this is inside dev->stop() after device stopped its irq
1024          * engine, but this requires more changes in devices. */
1025
1026         smp_mb__after_clear_bit(); /* Commit netif_running(). */
1027         while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) {
1028                 /* No hurry. */
1029                 msleep(1);
1030         }
1031
1032         /*
1033          *      Call the device specific close. This cannot fail.
1034          *      Only if device is UP
1035          *
1036          *      We allow it to be called even after a DETACH hot-plug
1037          *      event.
1038          */
1039         if (dev->stop)
1040                 dev->stop(dev);
1041
1042         /*
1043          *      Device is now down.
1044          */
1045
1046         dev->flags &= ~IFF_UP;
1047
1048         /*
1049          * Tell people we are down
1050          */
1051         raw_notifier_call_chain(&netdev_chain, NETDEV_DOWN, dev);
1052
1053         return 0;
1054 }
1055
1056
1057 /*
1058  *      Device change register/unregister. These are not inline or static
1059  *      as we export them to the world.
1060  */
1061
1062 /**
1063  *      register_netdevice_notifier - register a network notifier block
1064  *      @nb: notifier
1065  *
1066  *      Register a notifier to be called when network device events occur.
1067  *      The notifier passed is linked into the kernel structures and must
1068  *      not be reused until it has been unregistered. A negative errno code
1069  *      is returned on a failure.
1070  *
1071  *      When registered all registration and up events are replayed
1072  *      to the new notifier to allow device to have a race free
1073  *      view of the network device list.
1074  */
1075
1076 int register_netdevice_notifier(struct notifier_block *nb)
1077 {
1078         struct net_device *dev;
1079         struct net_device *last;
1080         int err;
1081
1082         rtnl_lock();
1083         err = raw_notifier_chain_register(&netdev_chain, nb);
1084         if (err)
1085                 goto unlock;
1086
1087         for_each_netdev(dev) {
1088                 err = nb->notifier_call(nb, NETDEV_REGISTER, dev);
1089                 err = notifier_to_errno(err);
1090                 if (err)
1091                         goto rollback;
1092
1093                 if (!(dev->flags & IFF_UP))
1094                         continue;
1095
1096                 nb->notifier_call(nb, NETDEV_UP, dev);
1097         }
1098
1099 unlock:
1100         rtnl_unlock();
1101         return err;
1102
1103 rollback:
1104         last = dev;
1105         for_each_netdev(dev) {
1106                 if (dev == last)
1107                         break;
1108
1109                 if (dev->flags & IFF_UP) {
1110                         nb->notifier_call(nb, NETDEV_GOING_DOWN, dev);
1111                         nb->notifier_call(nb, NETDEV_DOWN, dev);
1112                 }
1113                 nb->notifier_call(nb, NETDEV_UNREGISTER, dev);
1114         }
1115         goto unlock;
1116 }
1117
1118 /**
1119  *      unregister_netdevice_notifier - unregister a network notifier block
1120  *      @nb: notifier
1121  *
1122  *      Unregister a notifier previously registered by
1123  *      register_netdevice_notifier(). The notifier is unlinked into the
1124  *      kernel structures and may then be reused. A negative errno code
1125  *      is returned on a failure.
1126  */
1127
1128 int unregister_netdevice_notifier(struct notifier_block *nb)
1129 {
1130         int err;
1131
1132         rtnl_lock();
1133         err = raw_notifier_chain_unregister(&netdev_chain, nb);
1134         rtnl_unlock();
1135         return err;
1136 }
1137
1138 /**
1139  *      call_netdevice_notifiers - call all network notifier blocks
1140  *      @val: value passed unmodified to notifier function
1141  *      @v:   pointer passed unmodified to notifier function
1142  *
1143  *      Call all network notifier blocks.  Parameters and return value
1144  *      are as for raw_notifier_call_chain().
1145  */
1146
1147 int call_netdevice_notifiers(unsigned long val, void *v)
1148 {
1149         return raw_notifier_call_chain(&netdev_chain, val, v);
1150 }
1151
1152 /* When > 0 there are consumers of rx skb time stamps */
1153 static atomic_t netstamp_needed = ATOMIC_INIT(0);
1154
1155 void net_enable_timestamp(void)
1156 {
1157         atomic_inc(&netstamp_needed);
1158 }
1159
1160 void net_disable_timestamp(void)
1161 {
1162         atomic_dec(&netstamp_needed);
1163 }
1164
1165 static inline void net_timestamp(struct sk_buff *skb)
1166 {
1167         if (atomic_read(&netstamp_needed))
1168                 __net_timestamp(skb);
1169         else
1170                 skb->tstamp.tv64 = 0;
1171 }
1172
1173 /*
1174  *      Support routine. Sends outgoing frames to any network
1175  *      taps currently in use.
1176  */
1177
1178 static void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev)
1179 {
1180         struct packet_type *ptype;
1181
1182         net_timestamp(skb);
1183
1184         rcu_read_lock();
1185         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1186                 /* Never send packets back to the socket
1187                  * they originated from - MvS (miquels@drinkel.ow.org)
1188                  */
1189                 if ((ptype->dev == dev || !ptype->dev) &&
1190                     (ptype->af_packet_priv == NULL ||
1191                      (struct sock *)ptype->af_packet_priv != skb->sk)) {
1192                         struct sk_buff *skb2= skb_clone(skb, GFP_ATOMIC);
1193                         if (!skb2)
1194                                 break;
1195
1196                         /* skb->nh should be correctly
1197                            set by sender, so that the second statement is
1198                            just protection against buggy protocols.
1199                          */
1200                         skb_reset_mac_header(skb2);
1201
1202                         if (skb_network_header(skb2) < skb2->data ||
1203                             skb2->network_header > skb2->tail) {
1204                                 if (net_ratelimit())
1205                                         printk(KERN_CRIT "protocol %04x is "
1206                                                "buggy, dev %s\n",
1207                                                skb2->protocol, dev->name);
1208                                 skb_reset_network_header(skb2);
1209                         }
1210
1211                         skb2->transport_header = skb2->network_header;
1212                         skb2->pkt_type = PACKET_OUTGOING;
1213                         ptype->func(skb2, skb->dev, ptype, skb->dev);
1214                 }
1215         }
1216         rcu_read_unlock();
1217 }
1218
1219
1220 void __netif_schedule(struct net_device *dev)
1221 {
1222         if (!test_and_set_bit(__LINK_STATE_SCHED, &dev->state)) {
1223                 unsigned long flags;
1224                 struct softnet_data *sd;
1225
1226                 local_irq_save(flags);
1227                 sd = &__get_cpu_var(softnet_data);
1228                 dev->next_sched = sd->output_queue;
1229                 sd->output_queue = dev;
1230                 raise_softirq_irqoff(NET_TX_SOFTIRQ);
1231                 local_irq_restore(flags);
1232         }
1233 }
1234 EXPORT_SYMBOL(__netif_schedule);
1235
1236 void __netif_rx_schedule(struct net_device *dev)
1237 {
1238         unsigned long flags;
1239
1240         local_irq_save(flags);
1241         dev_hold(dev);
1242         list_add_tail(&dev->poll_list, &__get_cpu_var(softnet_data).poll_list);
1243         if (dev->quota < 0)
1244                 dev->quota += dev->weight;
1245         else
1246                 dev->quota = dev->weight;
1247         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
1248         local_irq_restore(flags);
1249 }
1250 EXPORT_SYMBOL(__netif_rx_schedule);
1251
1252 void dev_kfree_skb_any(struct sk_buff *skb)
1253 {
1254         if (in_irq() || irqs_disabled())
1255                 dev_kfree_skb_irq(skb);
1256         else
1257                 dev_kfree_skb(skb);
1258 }
1259 EXPORT_SYMBOL(dev_kfree_skb_any);
1260
1261
1262 /* Hot-plugging. */
1263 void netif_device_detach(struct net_device *dev)
1264 {
1265         if (test_and_clear_bit(__LINK_STATE_PRESENT, &dev->state) &&
1266             netif_running(dev)) {
1267                 netif_stop_queue(dev);
1268         }
1269 }
1270 EXPORT_SYMBOL(netif_device_detach);
1271
1272 void netif_device_attach(struct net_device *dev)
1273 {
1274         if (!test_and_set_bit(__LINK_STATE_PRESENT, &dev->state) &&
1275             netif_running(dev)) {
1276                 netif_wake_queue(dev);
1277                 __netdev_watchdog_up(dev);
1278         }
1279 }
1280 EXPORT_SYMBOL(netif_device_attach);
1281
1282
1283 /*
1284  * Invalidate hardware checksum when packet is to be mangled, and
1285  * complete checksum manually on outgoing path.
1286  */
1287 int skb_checksum_help(struct sk_buff *skb)
1288 {
1289         __wsum csum;
1290         int ret = 0, offset;
1291
1292         if (skb->ip_summed == CHECKSUM_COMPLETE)
1293                 goto out_set_summed;
1294
1295         if (unlikely(skb_shinfo(skb)->gso_size)) {
1296                 /* Let GSO fix up the checksum. */
1297                 goto out_set_summed;
1298         }
1299
1300         if (skb_cloned(skb)) {
1301                 ret = pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
1302                 if (ret)
1303                         goto out;
1304         }
1305
1306         offset = skb->csum_start - skb_headroom(skb);
1307         BUG_ON(offset > (int)skb->len);
1308         csum = skb_checksum(skb, offset, skb->len-offset, 0);
1309
1310         offset = skb_headlen(skb) - offset;
1311         BUG_ON(offset <= 0);
1312         BUG_ON(skb->csum_offset + 2 > offset);
1313
1314         *(__sum16 *)(skb->head + skb->csum_start + skb->csum_offset) =
1315                 csum_fold(csum);
1316 out_set_summed:
1317         skb->ip_summed = CHECKSUM_NONE;
1318 out:
1319         return ret;
1320 }
1321
1322 /**
1323  *      skb_gso_segment - Perform segmentation on skb.
1324  *      @skb: buffer to segment
1325  *      @features: features for the output path (see dev->features)
1326  *
1327  *      This function segments the given skb and returns a list of segments.
1328  *
1329  *      It may return NULL if the skb requires no segmentation.  This is
1330  *      only possible when GSO is used for verifying header integrity.
1331  */
1332 struct sk_buff *skb_gso_segment(struct sk_buff *skb, int features)
1333 {
1334         struct sk_buff *segs = ERR_PTR(-EPROTONOSUPPORT);
1335         struct packet_type *ptype;
1336         __be16 type = skb->protocol;
1337         int err;
1338
1339         BUG_ON(skb_shinfo(skb)->frag_list);
1340
1341         skb_reset_mac_header(skb);
1342         skb->mac_len = skb->network_header - skb->mac_header;
1343         __skb_pull(skb, skb->mac_len);
1344
1345         if (WARN_ON(skb->ip_summed != CHECKSUM_PARTIAL)) {
1346                 if (skb_header_cloned(skb) &&
1347                     (err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC)))
1348                         return ERR_PTR(err);
1349         }
1350
1351         rcu_read_lock();
1352         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type) & 15], list) {
1353                 if (ptype->type == type && !ptype->dev && ptype->gso_segment) {
1354                         if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
1355                                 err = ptype->gso_send_check(skb);
1356                                 segs = ERR_PTR(err);
1357                                 if (err || skb_gso_ok(skb, features))
1358                                         break;
1359                                 __skb_push(skb, (skb->data -
1360                                                  skb_network_header(skb)));
1361                         }
1362                         segs = ptype->gso_segment(skb, features);
1363                         break;
1364                 }
1365         }
1366         rcu_read_unlock();
1367
1368         __skb_push(skb, skb->data - skb_mac_header(skb));
1369
1370         return segs;
1371 }
1372
1373 EXPORT_SYMBOL(skb_gso_segment);
1374
1375 /* Take action when hardware reception checksum errors are detected. */
1376 #ifdef CONFIG_BUG
1377 void netdev_rx_csum_fault(struct net_device *dev)
1378 {
1379         if (net_ratelimit()) {
1380                 printk(KERN_ERR "%s: hw csum failure.\n",
1381                         dev ? dev->name : "<unknown>");
1382                 dump_stack();
1383         }
1384 }
1385 EXPORT_SYMBOL(netdev_rx_csum_fault);
1386 #endif
1387
1388 /* Actually, we should eliminate this check as soon as we know, that:
1389  * 1. IOMMU is present and allows to map all the memory.
1390  * 2. No high memory really exists on this machine.
1391  */
1392
1393 static inline int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
1394 {
1395 #ifdef CONFIG_HIGHMEM
1396         int i;
1397
1398         if (dev->features & NETIF_F_HIGHDMA)
1399                 return 0;
1400
1401         for (i = 0; i < skb_shinfo(skb)->nr_frags; i++)
1402                 if (PageHighMem(skb_shinfo(skb)->frags[i].page))
1403                         return 1;
1404
1405 #endif
1406         return 0;
1407 }
1408
1409 struct dev_gso_cb {
1410         void (*destructor)(struct sk_buff *skb);
1411 };
1412
1413 #define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
1414
1415 static void dev_gso_skb_destructor(struct sk_buff *skb)
1416 {
1417         struct dev_gso_cb *cb;
1418
1419         do {
1420                 struct sk_buff *nskb = skb->next;
1421
1422                 skb->next = nskb->next;
1423                 nskb->next = NULL;
1424                 kfree_skb(nskb);
1425         } while (skb->next);
1426
1427         cb = DEV_GSO_CB(skb);
1428         if (cb->destructor)
1429                 cb->destructor(skb);
1430 }
1431
1432 /**
1433  *      dev_gso_segment - Perform emulated hardware segmentation on skb.
1434  *      @skb: buffer to segment
1435  *
1436  *      This function segments the given skb and stores the list of segments
1437  *      in skb->next.
1438  */
1439 static int dev_gso_segment(struct sk_buff *skb)
1440 {
1441         struct net_device *dev = skb->dev;
1442         struct sk_buff *segs;
1443         int features = dev->features & ~(illegal_highdma(dev, skb) ?
1444                                          NETIF_F_SG : 0);
1445
1446         segs = skb_gso_segment(skb, features);
1447
1448         /* Verifying header integrity only. */
1449         if (!segs)
1450                 return 0;
1451
1452         if (unlikely(IS_ERR(segs)))
1453                 return PTR_ERR(segs);
1454
1455         skb->next = segs;
1456         DEV_GSO_CB(skb)->destructor = skb->destructor;
1457         skb->destructor = dev_gso_skb_destructor;
1458
1459         return 0;
1460 }
1461
1462 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev)
1463 {
1464         if (likely(!skb->next)) {
1465                 if (!list_empty(&ptype_all))
1466                         dev_queue_xmit_nit(skb, dev);
1467
1468                 if (netif_needs_gso(dev, skb)) {
1469                         if (unlikely(dev_gso_segment(skb)))
1470                                 goto out_kfree_skb;
1471                         if (skb->next)
1472                                 goto gso;
1473                 }
1474
1475                 return dev->hard_start_xmit(skb, dev);
1476         }
1477
1478 gso:
1479         do {
1480                 struct sk_buff *nskb = skb->next;
1481                 int rc;
1482
1483                 skb->next = nskb->next;
1484                 nskb->next = NULL;
1485                 rc = dev->hard_start_xmit(nskb, dev);
1486                 if (unlikely(rc)) {
1487                         nskb->next = skb->next;
1488                         skb->next = nskb;
1489                         return rc;
1490                 }
1491                 if (unlikely((netif_queue_stopped(dev) ||
1492                              netif_subqueue_stopped(dev, skb->queue_mapping)) &&
1493                              skb->next))
1494                         return NETDEV_TX_BUSY;
1495         } while (skb->next);
1496
1497         skb->destructor = DEV_GSO_CB(skb)->destructor;
1498
1499 out_kfree_skb:
1500         kfree_skb(skb);
1501         return 0;
1502 }
1503
1504 #define HARD_TX_LOCK(dev, cpu) {                        \
1505         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1506                 netif_tx_lock(dev);                     \
1507         }                                               \
1508 }
1509
1510 #define HARD_TX_UNLOCK(dev) {                           \
1511         if ((dev->features & NETIF_F_LLTX) == 0) {      \
1512                 netif_tx_unlock(dev);                   \
1513         }                                               \
1514 }
1515
1516 /**
1517  *      dev_queue_xmit - transmit a buffer
1518  *      @skb: buffer to transmit
1519  *
1520  *      Queue a buffer for transmission to a network device. The caller must
1521  *      have set the device and priority and built the buffer before calling
1522  *      this function. The function can be called from an interrupt.
1523  *
1524  *      A negative errno code is returned on a failure. A success does not
1525  *      guarantee the frame will be transmitted as it may be dropped due
1526  *      to congestion or traffic shaping.
1527  *
1528  * -----------------------------------------------------------------------------------
1529  *      I notice this method can also return errors from the queue disciplines,
1530  *      including NET_XMIT_DROP, which is a positive value.  So, errors can also
1531  *      be positive.
1532  *
1533  *      Regardless of the return value, the skb is consumed, so it is currently
1534  *      difficult to retry a send to this method.  (You can bump the ref count
1535  *      before sending to hold a reference for retry if you are careful.)
1536  *
1537  *      When calling this method, interrupts MUST be enabled.  This is because
1538  *      the BH enable code must have IRQs enabled so that it will not deadlock.
1539  *          --BLG
1540  */
1541
1542 int dev_queue_xmit(struct sk_buff *skb)
1543 {
1544         struct net_device *dev = skb->dev;
1545         struct Qdisc *q;
1546         int rc = -ENOMEM;
1547
1548         /* GSO will handle the following emulations directly. */
1549         if (netif_needs_gso(dev, skb))
1550                 goto gso;
1551
1552         if (skb_shinfo(skb)->frag_list &&
1553             !(dev->features & NETIF_F_FRAGLIST) &&
1554             __skb_linearize(skb))
1555                 goto out_kfree_skb;
1556
1557         /* Fragmented skb is linearized if device does not support SG,
1558          * or if at least one of fragments is in highmem and device
1559          * does not support DMA from it.
1560          */
1561         if (skb_shinfo(skb)->nr_frags &&
1562             (!(dev->features & NETIF_F_SG) || illegal_highdma(dev, skb)) &&
1563             __skb_linearize(skb))
1564                 goto out_kfree_skb;
1565
1566         /* If packet is not checksummed and device does not support
1567          * checksumming for this protocol, complete checksumming here.
1568          */
1569         if (skb->ip_summed == CHECKSUM_PARTIAL) {
1570                 skb_set_transport_header(skb, skb->csum_start -
1571                                               skb_headroom(skb));
1572
1573                 if (!(dev->features & NETIF_F_GEN_CSUM) &&
1574                     !((dev->features & NETIF_F_IP_CSUM) &&
1575                       skb->protocol == htons(ETH_P_IP)) &&
1576                     !((dev->features & NETIF_F_IPV6_CSUM) &&
1577                       skb->protocol == htons(ETH_P_IPV6)))
1578                         if (skb_checksum_help(skb))
1579                                 goto out_kfree_skb;
1580         }
1581
1582 gso:
1583         spin_lock_prefetch(&dev->queue_lock);
1584
1585         /* Disable soft irqs for various locks below. Also
1586          * stops preemption for RCU.
1587          */
1588         rcu_read_lock_bh();
1589
1590         /* Updates of qdisc are serialized by queue_lock.
1591          * The struct Qdisc which is pointed to by qdisc is now a
1592          * rcu structure - it may be accessed without acquiring
1593          * a lock (but the structure may be stale.) The freeing of the
1594          * qdisc will be deferred until it's known that there are no
1595          * more references to it.
1596          *
1597          * If the qdisc has an enqueue function, we still need to
1598          * hold the queue_lock before calling it, since queue_lock
1599          * also serializes access to the device queue.
1600          */
1601
1602         q = rcu_dereference(dev->qdisc);
1603 #ifdef CONFIG_NET_CLS_ACT
1604         skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_EGRESS);
1605 #endif
1606         if (q->enqueue) {
1607                 /* Grab device queue */
1608                 spin_lock(&dev->queue_lock);
1609                 q = dev->qdisc;
1610                 if (q->enqueue) {
1611                         /* reset queue_mapping to zero */
1612                         skb->queue_mapping = 0;
1613                         rc = q->enqueue(skb, q);
1614                         qdisc_run(dev);
1615                         spin_unlock(&dev->queue_lock);
1616
1617                         rc = rc == NET_XMIT_BYPASS ? NET_XMIT_SUCCESS : rc;
1618                         goto out;
1619                 }
1620                 spin_unlock(&dev->queue_lock);
1621         }
1622
1623         /* The device has no queue. Common case for software devices:
1624            loopback, all the sorts of tunnels...
1625
1626            Really, it is unlikely that netif_tx_lock protection is necessary
1627            here.  (f.e. loopback and IP tunnels are clean ignoring statistics
1628            counters.)
1629            However, it is possible, that they rely on protection
1630            made by us here.
1631
1632            Check this and shot the lock. It is not prone from deadlocks.
1633            Either shot noqueue qdisc, it is even simpler 8)
1634          */
1635         if (dev->flags & IFF_UP) {
1636                 int cpu = smp_processor_id(); /* ok because BHs are off */
1637
1638                 if (dev->xmit_lock_owner != cpu) {
1639
1640                         HARD_TX_LOCK(dev, cpu);
1641
1642                         if (!netif_queue_stopped(dev) &&
1643                             !netif_subqueue_stopped(dev, skb->queue_mapping)) {
1644                                 rc = 0;
1645                                 if (!dev_hard_start_xmit(skb, dev)) {
1646                                         HARD_TX_UNLOCK(dev);
1647                                         goto out;
1648                                 }
1649                         }
1650                         HARD_TX_UNLOCK(dev);
1651                         if (net_ratelimit())
1652                                 printk(KERN_CRIT "Virtual device %s asks to "
1653                                        "queue packet!\n", dev->name);
1654                 } else {
1655                         /* Recursion is detected! It is possible,
1656                          * unfortunately */
1657                         if (net_ratelimit())
1658                                 printk(KERN_CRIT "Dead loop on virtual device "
1659                                        "%s, fix it urgently!\n", dev->name);
1660                 }
1661         }
1662
1663         rc = -ENETDOWN;
1664         rcu_read_unlock_bh();
1665
1666 out_kfree_skb:
1667         kfree_skb(skb);
1668         return rc;
1669 out:
1670         rcu_read_unlock_bh();
1671         return rc;
1672 }
1673
1674
1675 /*=======================================================================
1676                         Receiver routines
1677   =======================================================================*/
1678
1679 int netdev_max_backlog __read_mostly = 1000;
1680 int netdev_budget __read_mostly = 300;
1681 int weight_p __read_mostly = 64;            /* old backlog weight */
1682
1683 DEFINE_PER_CPU(struct netif_rx_stats, netdev_rx_stat) = { 0, };
1684
1685
1686 /**
1687  *      netif_rx        -       post buffer to the network code
1688  *      @skb: buffer to post
1689  *
1690  *      This function receives a packet from a device driver and queues it for
1691  *      the upper (protocol) levels to process.  It always succeeds. The buffer
1692  *      may be dropped during processing for congestion control or by the
1693  *      protocol layers.
1694  *
1695  *      return values:
1696  *      NET_RX_SUCCESS  (no congestion)
1697  *      NET_RX_CN_LOW   (low congestion)
1698  *      NET_RX_CN_MOD   (moderate congestion)
1699  *      NET_RX_CN_HIGH  (high congestion)
1700  *      NET_RX_DROP     (packet was dropped)
1701  *
1702  */
1703
1704 int netif_rx(struct sk_buff *skb)
1705 {
1706         struct softnet_data *queue;
1707         unsigned long flags;
1708
1709         /* if netpoll wants it, pretend we never saw it */
1710         if (netpoll_rx(skb))
1711                 return NET_RX_DROP;
1712
1713         if (!skb->tstamp.tv64)
1714                 net_timestamp(skb);
1715
1716         /*
1717          * The code is rearranged so that the path is the most
1718          * short when CPU is congested, but is still operating.
1719          */
1720         local_irq_save(flags);
1721         queue = &__get_cpu_var(softnet_data);
1722
1723         __get_cpu_var(netdev_rx_stat).total++;
1724         if (queue->input_pkt_queue.qlen <= netdev_max_backlog) {
1725                 if (queue->input_pkt_queue.qlen) {
1726 enqueue:
1727                         dev_hold(skb->dev);
1728                         __skb_queue_tail(&queue->input_pkt_queue, skb);
1729                         local_irq_restore(flags);
1730                         return NET_RX_SUCCESS;
1731                 }
1732
1733                 netif_rx_schedule(&queue->backlog_dev);
1734                 goto enqueue;
1735         }
1736
1737         __get_cpu_var(netdev_rx_stat).dropped++;
1738         local_irq_restore(flags);
1739
1740         kfree_skb(skb);
1741         return NET_RX_DROP;
1742 }
1743
1744 int netif_rx_ni(struct sk_buff *skb)
1745 {
1746         int err;
1747
1748         preempt_disable();
1749         err = netif_rx(skb);
1750         if (local_softirq_pending())
1751                 do_softirq();
1752         preempt_enable();
1753
1754         return err;
1755 }
1756
1757 EXPORT_SYMBOL(netif_rx_ni);
1758
1759 static inline struct net_device *skb_bond(struct sk_buff *skb)
1760 {
1761         struct net_device *dev = skb->dev;
1762
1763         if (dev->master) {
1764                 if (skb_bond_should_drop(skb)) {
1765                         kfree_skb(skb);
1766                         return NULL;
1767                 }
1768                 skb->dev = dev->master;
1769         }
1770
1771         return dev;
1772 }
1773
1774 static void net_tx_action(struct softirq_action *h)
1775 {
1776         struct softnet_data *sd = &__get_cpu_var(softnet_data);
1777
1778         if (sd->completion_queue) {
1779                 struct sk_buff *clist;
1780
1781                 local_irq_disable();
1782                 clist = sd->completion_queue;
1783                 sd->completion_queue = NULL;
1784                 local_irq_enable();
1785
1786                 while (clist) {
1787                         struct sk_buff *skb = clist;
1788                         clist = clist->next;
1789
1790                         BUG_TRAP(!atomic_read(&skb->users));
1791                         __kfree_skb(skb);
1792                 }
1793         }
1794
1795         if (sd->output_queue) {
1796                 struct net_device *head;
1797
1798                 local_irq_disable();
1799                 head = sd->output_queue;
1800                 sd->output_queue = NULL;
1801                 local_irq_enable();
1802
1803                 while (head) {
1804                         struct net_device *dev = head;
1805                         head = head->next_sched;
1806
1807                         smp_mb__before_clear_bit();
1808                         clear_bit(__LINK_STATE_SCHED, &dev->state);
1809
1810                         if (spin_trylock(&dev->queue_lock)) {
1811                                 qdisc_run(dev);
1812                                 spin_unlock(&dev->queue_lock);
1813                         } else {
1814                                 netif_schedule(dev);
1815                         }
1816                 }
1817         }
1818 }
1819
1820 static inline int deliver_skb(struct sk_buff *skb,
1821                               struct packet_type *pt_prev,
1822                               struct net_device *orig_dev)
1823 {
1824         atomic_inc(&skb->users);
1825         return pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
1826 }
1827
1828 #if defined(CONFIG_BRIDGE) || defined (CONFIG_BRIDGE_MODULE)
1829 /* These hooks defined here for ATM */
1830 struct net_bridge;
1831 struct net_bridge_fdb_entry *(*br_fdb_get_hook)(struct net_bridge *br,
1832                                                 unsigned char *addr);
1833 void (*br_fdb_put_hook)(struct net_bridge_fdb_entry *ent) __read_mostly;
1834
1835 /*
1836  * If bridge module is loaded call bridging hook.
1837  *  returns NULL if packet was consumed.
1838  */
1839 struct sk_buff *(*br_handle_frame_hook)(struct net_bridge_port *p,
1840                                         struct sk_buff *skb) __read_mostly;
1841 static inline struct sk_buff *handle_bridge(struct sk_buff *skb,
1842                                             struct packet_type **pt_prev, int *ret,
1843                                             struct net_device *orig_dev)
1844 {
1845         struct net_bridge_port *port;
1846
1847         if (skb->pkt_type == PACKET_LOOPBACK ||
1848             (port = rcu_dereference(skb->dev->br_port)) == NULL)
1849                 return skb;
1850
1851         if (*pt_prev) {
1852                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1853                 *pt_prev = NULL;
1854         }
1855
1856         return br_handle_frame_hook(port, skb);
1857 }
1858 #else
1859 #define handle_bridge(skb, pt_prev, ret, orig_dev)      (skb)
1860 #endif
1861
1862 #if defined(CONFIG_MACVLAN) || defined(CONFIG_MACVLAN_MODULE)
1863 struct sk_buff *(*macvlan_handle_frame_hook)(struct sk_buff *skb) __read_mostly;
1864 EXPORT_SYMBOL_GPL(macvlan_handle_frame_hook);
1865
1866 static inline struct sk_buff *handle_macvlan(struct sk_buff *skb,
1867                                              struct packet_type **pt_prev,
1868                                              int *ret,
1869                                              struct net_device *orig_dev)
1870 {
1871         if (skb->dev->macvlan_port == NULL)
1872                 return skb;
1873
1874         if (*pt_prev) {
1875                 *ret = deliver_skb(skb, *pt_prev, orig_dev);
1876                 *pt_prev = NULL;
1877         }
1878         return macvlan_handle_frame_hook(skb);
1879 }
1880 #else
1881 #define handle_macvlan(skb, pt_prev, ret, orig_dev)     (skb)
1882 #endif
1883
1884 #ifdef CONFIG_NET_CLS_ACT
1885 /* TODO: Maybe we should just force sch_ingress to be compiled in
1886  * when CONFIG_NET_CLS_ACT is? otherwise some useless instructions
1887  * a compare and 2 stores extra right now if we dont have it on
1888  * but have CONFIG_NET_CLS_ACT
1889  * NOTE: This doesnt stop any functionality; if you dont have
1890  * the ingress scheduler, you just cant add policies on ingress.
1891  *
1892  */
1893 static int ing_filter(struct sk_buff *skb)
1894 {
1895         struct Qdisc *q;
1896         struct net_device *dev = skb->dev;
1897         int result = TC_ACT_OK;
1898
1899         if (dev->qdisc_ingress) {
1900                 __u32 ttl = (__u32) G_TC_RTTL(skb->tc_verd);
1901                 if (MAX_RED_LOOP < ttl++) {
1902                         printk(KERN_WARNING "Redir loop detected Dropping packet (%d->%d)\n",
1903                                 skb->iif, skb->dev->ifindex);
1904                         return TC_ACT_SHOT;
1905                 }
1906
1907                 skb->tc_verd = SET_TC_RTTL(skb->tc_verd,ttl);
1908
1909                 skb->tc_verd = SET_TC_AT(skb->tc_verd,AT_INGRESS);
1910
1911                 spin_lock(&dev->ingress_lock);
1912                 if ((q = dev->qdisc_ingress) != NULL)
1913                         result = q->enqueue(skb, q);
1914                 spin_unlock(&dev->ingress_lock);
1915
1916         }
1917
1918         return result;
1919 }
1920 #endif
1921
1922 int netif_receive_skb(struct sk_buff *skb)
1923 {
1924         struct packet_type *ptype, *pt_prev;
1925         struct net_device *orig_dev;
1926         int ret = NET_RX_DROP;
1927         __be16 type;
1928
1929         /* if we've gotten here through NAPI, check netpoll */
1930         if (skb->dev->poll && netpoll_rx(skb))
1931                 return NET_RX_DROP;
1932
1933         if (!skb->tstamp.tv64)
1934                 net_timestamp(skb);
1935
1936         if (!skb->iif)
1937                 skb->iif = skb->dev->ifindex;
1938
1939         orig_dev = skb_bond(skb);
1940
1941         if (!orig_dev)
1942                 return NET_RX_DROP;
1943
1944         __get_cpu_var(netdev_rx_stat).total++;
1945
1946         skb_reset_network_header(skb);
1947         skb_reset_transport_header(skb);
1948         skb->mac_len = skb->network_header - skb->mac_header;
1949
1950         pt_prev = NULL;
1951
1952         rcu_read_lock();
1953
1954 #ifdef CONFIG_NET_CLS_ACT
1955         if (skb->tc_verd & TC_NCLS) {
1956                 skb->tc_verd = CLR_TC_NCLS(skb->tc_verd);
1957                 goto ncls;
1958         }
1959 #endif
1960
1961         list_for_each_entry_rcu(ptype, &ptype_all, list) {
1962                 if (!ptype->dev || ptype->dev == skb->dev) {
1963                         if (pt_prev)
1964                                 ret = deliver_skb(skb, pt_prev, orig_dev);
1965                         pt_prev = ptype;
1966                 }
1967         }
1968
1969 #ifdef CONFIG_NET_CLS_ACT
1970         if (pt_prev) {
1971                 ret = deliver_skb(skb, pt_prev, orig_dev);
1972                 pt_prev = NULL; /* noone else should process this after*/
1973         } else {
1974                 skb->tc_verd = SET_TC_OK2MUNGE(skb->tc_verd);
1975         }
1976
1977         ret = ing_filter(skb);
1978
1979         if (ret == TC_ACT_SHOT || (ret == TC_ACT_STOLEN)) {
1980                 kfree_skb(skb);
1981                 goto out;
1982         }
1983
1984         skb->tc_verd = 0;
1985 ncls:
1986 #endif
1987
1988         skb = handle_bridge(skb, &pt_prev, &ret, orig_dev);
1989         if (!skb)
1990                 goto out;
1991         skb = handle_macvlan(skb, &pt_prev, &ret, orig_dev);
1992         if (!skb)
1993                 goto out;
1994
1995         type = skb->protocol;
1996         list_for_each_entry_rcu(ptype, &ptype_base[ntohs(type)&15], list) {
1997                 if (ptype->type == type &&
1998                     (!ptype->dev || ptype->dev == skb->dev)) {
1999                         if (pt_prev)
2000                                 ret = deliver_skb(skb, pt_prev, orig_dev);
2001                         pt_prev = ptype;
2002                 }
2003         }
2004
2005         if (pt_prev) {
2006                 ret = pt_prev->func(skb, skb->dev, pt_prev, orig_dev);
2007         } else {
2008                 kfree_skb(skb);
2009                 /* Jamal, now you will not able to escape explaining
2010                  * me how you were going to use this. :-)
2011                  */
2012                 ret = NET_RX_DROP;
2013         }
2014
2015 out:
2016         rcu_read_unlock();
2017         return ret;
2018 }
2019
2020 static int process_backlog(struct net_device *backlog_dev, int *budget)
2021 {
2022         int work = 0;
2023         int quota = min(backlog_dev->quota, *budget);
2024         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2025         unsigned long start_time = jiffies;
2026
2027         backlog_dev->weight = weight_p;
2028         for (;;) {
2029                 struct sk_buff *skb;
2030                 struct net_device *dev;
2031
2032                 local_irq_disable();
2033                 skb = __skb_dequeue(&queue->input_pkt_queue);
2034                 if (!skb)
2035                         goto job_done;
2036                 local_irq_enable();
2037
2038                 dev = skb->dev;
2039
2040                 netif_receive_skb(skb);
2041
2042                 dev_put(dev);
2043
2044                 work++;
2045
2046                 if (work >= quota || jiffies - start_time > 1)
2047                         break;
2048
2049         }
2050
2051         backlog_dev->quota -= work;
2052         *budget -= work;
2053         return -1;
2054
2055 job_done:
2056         backlog_dev->quota -= work;
2057         *budget -= work;
2058
2059         list_del(&backlog_dev->poll_list);
2060         smp_mb__before_clear_bit();
2061         netif_poll_enable(backlog_dev);
2062
2063         local_irq_enable();
2064         return 0;
2065 }
2066
2067 static void net_rx_action(struct softirq_action *h)
2068 {
2069         struct softnet_data *queue = &__get_cpu_var(softnet_data);
2070         unsigned long start_time = jiffies;
2071         int budget = netdev_budget;
2072         void *have;
2073
2074         local_irq_disable();
2075
2076         while (!list_empty(&queue->poll_list)) {
2077                 struct net_device *dev;
2078
2079                 if (budget <= 0 || jiffies - start_time > 1)
2080                         goto softnet_break;
2081
2082                 local_irq_enable();
2083
2084                 dev = list_entry(queue->poll_list.next,
2085                                  struct net_device, poll_list);
2086                 have = netpoll_poll_lock(dev);
2087
2088                 if (dev->quota <= 0 || dev->poll(dev, &budget)) {
2089                         netpoll_poll_unlock(have);
2090                         local_irq_disable();
2091                         list_move_tail(&dev->poll_list, &queue->poll_list);
2092                         if (dev->quota < 0)
2093                                 dev->quota += dev->weight;
2094                         else
2095                                 dev->quota = dev->weight;
2096                 } else {
2097                         netpoll_poll_unlock(have);
2098                         dev_put(dev);
2099                         local_irq_disable();
2100                 }
2101         }
2102 out:
2103         local_irq_enable();
2104 #ifdef CONFIG_NET_DMA
2105         /*
2106          * There may not be any more sk_buffs coming right now, so push
2107          * any pending DMA copies to hardware
2108          */
2109         if (!cpus_empty(net_dma.channel_mask)) {
2110                 int chan_idx;
2111                 for_each_cpu_mask(chan_idx, net_dma.channel_mask) {
2112                         struct dma_chan *chan = net_dma.channels[chan_idx];
2113                         if (chan)
2114                                 dma_async_memcpy_issue_pending(chan);
2115                 }
2116         }
2117 #endif
2118         return;
2119
2120 softnet_break:
2121         __get_cpu_var(netdev_rx_stat).time_squeeze++;
2122         __raise_softirq_irqoff(NET_RX_SOFTIRQ);
2123         goto out;
2124 }
2125
2126 static gifconf_func_t * gifconf_list [NPROTO];
2127
2128 /**
2129  *      register_gifconf        -       register a SIOCGIF handler
2130  *      @family: Address family
2131  *      @gifconf: Function handler
2132  *
2133  *      Register protocol dependent address dumping routines. The handler
2134  *      that is passed must not be freed or reused until it has been replaced
2135  *      by another handler.
2136  */
2137 int register_gifconf(unsigned int family, gifconf_func_t * gifconf)
2138 {
2139         if (family >= NPROTO)
2140                 return -EINVAL;
2141         gifconf_list[family] = gifconf;
2142         return 0;
2143 }
2144
2145
2146 /*
2147  *      Map an interface index to its name (SIOCGIFNAME)
2148  */
2149
2150 /*
2151  *      We need this ioctl for efficient implementation of the
2152  *      if_indextoname() function required by the IPv6 API.  Without
2153  *      it, we would have to search all the interfaces to find a
2154  *      match.  --pb
2155  */
2156
2157 static int dev_ifname(struct ifreq __user *arg)
2158 {
2159         struct net_device *dev;
2160         struct ifreq ifr;
2161
2162         /*
2163          *      Fetch the caller's info block.
2164          */
2165
2166         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
2167                 return -EFAULT;
2168
2169         read_lock(&dev_base_lock);
2170         dev = __dev_get_by_index(ifr.ifr_ifindex);
2171         if (!dev) {
2172                 read_unlock(&dev_base_lock);
2173                 return -ENODEV;
2174         }
2175
2176         strcpy(ifr.ifr_name, dev->name);
2177         read_unlock(&dev_base_lock);
2178
2179         if (copy_to_user(arg, &ifr, sizeof(struct ifreq)))
2180                 return -EFAULT;
2181         return 0;
2182 }
2183
2184 /*
2185  *      Perform a SIOCGIFCONF call. This structure will change
2186  *      size eventually, and there is nothing I can do about it.
2187  *      Thus we will need a 'compatibility mode'.
2188  */
2189
2190 static int dev_ifconf(char __user *arg)
2191 {
2192         struct ifconf ifc;
2193         struct net_device *dev;
2194         char __user *pos;
2195         int len;
2196         int total;
2197         int i;
2198
2199         /*
2200          *      Fetch the caller's info block.
2201          */
2202
2203         if (copy_from_user(&ifc, arg, sizeof(struct ifconf)))
2204                 return -EFAULT;
2205
2206         pos = ifc.ifc_buf;
2207         len = ifc.ifc_len;
2208
2209         /*
2210          *      Loop over the interfaces, and write an info block for each.
2211          */
2212
2213         total = 0;
2214         for_each_netdev(dev) {
2215                 for (i = 0; i < NPROTO; i++) {
2216                         if (gifconf_list[i]) {
2217                                 int done;
2218                                 if (!pos)
2219                                         done = gifconf_list[i](dev, NULL, 0);
2220                                 else
2221                                         done = gifconf_list[i](dev, pos + total,
2222                                                                len - total);
2223                                 if (done < 0)
2224                                         return -EFAULT;
2225                                 total += done;
2226                         }
2227                 }
2228         }
2229
2230         /*
2231          *      All done.  Write the updated control block back to the caller.
2232          */
2233         ifc.ifc_len = total;
2234
2235         /*
2236          *      Both BSD and Solaris return 0 here, so we do too.
2237          */
2238         return copy_to_user(arg, &ifc, sizeof(struct ifconf)) ? -EFAULT : 0;
2239 }
2240
2241 #ifdef CONFIG_PROC_FS
2242 /*
2243  *      This is invoked by the /proc filesystem handler to display a device
2244  *      in detail.
2245  */
2246 void *dev_seq_start(struct seq_file *seq, loff_t *pos)
2247 {
2248         loff_t off;
2249         struct net_device *dev;
2250
2251         read_lock(&dev_base_lock);
2252         if (!*pos)
2253                 return SEQ_START_TOKEN;
2254
2255         off = 1;
2256         for_each_netdev(dev)
2257                 if (off++ == *pos)
2258                         return dev;
2259
2260         return NULL;
2261 }
2262
2263 void *dev_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2264 {
2265         ++*pos;
2266         return v == SEQ_START_TOKEN ?
2267                 first_net_device() : next_net_device((struct net_device *)v);
2268 }
2269
2270 void dev_seq_stop(struct seq_file *seq, void *v)
2271 {
2272         read_unlock(&dev_base_lock);
2273 }
2274
2275 static void dev_seq_printf_stats(struct seq_file *seq, struct net_device *dev)
2276 {
2277         struct net_device_stats *stats = dev->get_stats(dev);
2278
2279         seq_printf(seq, "%6s:%8lu %7lu %4lu %4lu %4lu %5lu %10lu %9lu "
2280                    "%8lu %7lu %4lu %4lu %4lu %5lu %7lu %10lu\n",
2281                    dev->name, stats->rx_bytes, stats->rx_packets,
2282                    stats->rx_errors,
2283                    stats->rx_dropped + stats->rx_missed_errors,
2284                    stats->rx_fifo_errors,
2285                    stats->rx_length_errors + stats->rx_over_errors +
2286                     stats->rx_crc_errors + stats->rx_frame_errors,
2287                    stats->rx_compressed, stats->multicast,
2288                    stats->tx_bytes, stats->tx_packets,
2289                    stats->tx_errors, stats->tx_dropped,
2290                    stats->tx_fifo_errors, stats->collisions,
2291                    stats->tx_carrier_errors +
2292                     stats->tx_aborted_errors +
2293                     stats->tx_window_errors +
2294                     stats->tx_heartbeat_errors,
2295                    stats->tx_compressed);
2296 }
2297
2298 /*
2299  *      Called from the PROCfs module. This now uses the new arbitrary sized
2300  *      /proc/net interface to create /proc/net/dev
2301  */
2302 static int dev_seq_show(struct seq_file *seq, void *v)
2303 {
2304         if (v == SEQ_START_TOKEN)
2305                 seq_puts(seq, "Inter-|   Receive                            "
2306                               "                    |  Transmit\n"
2307                               " face |bytes    packets errs drop fifo frame "
2308                               "compressed multicast|bytes    packets errs "
2309                               "drop fifo colls carrier compressed\n");
2310         else
2311                 dev_seq_printf_stats(seq, v);
2312         return 0;
2313 }
2314
2315 static struct netif_rx_stats *softnet_get_online(loff_t *pos)
2316 {
2317         struct netif_rx_stats *rc = NULL;
2318
2319         while (*pos < NR_CPUS)
2320                 if (cpu_online(*pos)) {
2321                         rc = &per_cpu(netdev_rx_stat, *pos);
2322                         break;
2323                 } else
2324                         ++*pos;
2325         return rc;
2326 }
2327
2328 static void *softnet_seq_start(struct seq_file *seq, loff_t *pos)
2329 {
2330         return softnet_get_online(pos);
2331 }
2332
2333 static void *softnet_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2334 {
2335         ++*pos;
2336         return softnet_get_online(pos);
2337 }
2338
2339 static void softnet_seq_stop(struct seq_file *seq, void *v)
2340 {
2341 }
2342
2343 static int softnet_seq_show(struct seq_file *seq, void *v)
2344 {
2345         struct netif_rx_stats *s = v;
2346
2347         seq_printf(seq, "%08x %08x %08x %08x %08x %08x %08x %08x %08x\n",
2348                    s->total, s->dropped, s->time_squeeze, 0,
2349                    0, 0, 0, 0, /* was fastroute */
2350                    s->cpu_collision );
2351         return 0;
2352 }
2353
2354 static const struct seq_operations dev_seq_ops = {
2355         .start = dev_seq_start,
2356         .next  = dev_seq_next,
2357         .stop  = dev_seq_stop,
2358         .show  = dev_seq_show,
2359 };
2360
2361 static int dev_seq_open(struct inode *inode, struct file *file)
2362 {
2363         return seq_open(file, &dev_seq_ops);
2364 }
2365
2366 static const struct file_operations dev_seq_fops = {
2367         .owner   = THIS_MODULE,
2368         .open    = dev_seq_open,
2369         .read    = seq_read,
2370         .llseek  = seq_lseek,
2371         .release = seq_release,
2372 };
2373
2374 static const struct seq_operations softnet_seq_ops = {
2375         .start = softnet_seq_start,
2376         .next  = softnet_seq_next,
2377         .stop  = softnet_seq_stop,
2378         .show  = softnet_seq_show,
2379 };
2380
2381 static int softnet_seq_open(struct inode *inode, struct file *file)
2382 {
2383         return seq_open(file, &softnet_seq_ops);
2384 }
2385
2386 static const struct file_operations softnet_seq_fops = {
2387         .owner   = THIS_MODULE,
2388         .open    = softnet_seq_open,
2389         .read    = seq_read,
2390         .llseek  = seq_lseek,
2391         .release = seq_release,
2392 };
2393
2394 static void *ptype_get_idx(loff_t pos)
2395 {
2396         struct packet_type *pt = NULL;
2397         loff_t i = 0;
2398         int t;
2399
2400         list_for_each_entry_rcu(pt, &ptype_all, list) {
2401                 if (i == pos)
2402                         return pt;
2403                 ++i;
2404         }
2405
2406         for (t = 0; t < 16; t++) {
2407                 list_for_each_entry_rcu(pt, &ptype_base[t], list) {
2408                         if (i == pos)
2409                                 return pt;
2410                         ++i;
2411                 }
2412         }
2413         return NULL;
2414 }
2415
2416 static void *ptype_seq_start(struct seq_file *seq, loff_t *pos)
2417 {
2418         rcu_read_lock();
2419         return *pos ? ptype_get_idx(*pos - 1) : SEQ_START_TOKEN;
2420 }
2421
2422 static void *ptype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
2423 {
2424         struct packet_type *pt;
2425         struct list_head *nxt;
2426         int hash;
2427
2428         ++*pos;
2429         if (v == SEQ_START_TOKEN)
2430                 return ptype_get_idx(0);
2431
2432         pt = v;
2433         nxt = pt->list.next;
2434         if (pt->type == htons(ETH_P_ALL)) {
2435                 if (nxt != &ptype_all)
2436                         goto found;
2437                 hash = 0;
2438                 nxt = ptype_base[0].next;
2439         } else
2440                 hash = ntohs(pt->type) & 15;
2441
2442         while (nxt == &ptype_base[hash]) {
2443                 if (++hash >= 16)
2444                         return NULL;
2445                 nxt = ptype_base[hash].next;
2446         }
2447 found:
2448         return list_entry(nxt, struct packet_type, list);
2449 }
2450
2451 static void ptype_seq_stop(struct seq_file *seq, void *v)
2452 {
2453         rcu_read_unlock();
2454 }
2455
2456 static void ptype_seq_decode(struct seq_file *seq, void *sym)
2457 {
2458 #ifdef CONFIG_KALLSYMS
2459         unsigned long offset = 0, symsize;
2460         const char *symname;
2461         char *modname;
2462         char namebuf[128];
2463
2464         symname = kallsyms_lookup((unsigned long)sym, &symsize, &offset,
2465                                   &modname, namebuf);
2466
2467         if (symname) {
2468                 char *delim = ":";
2469
2470                 if (!modname)
2471                         modname = delim = "";
2472                 seq_printf(seq, "%s%s%s%s+0x%lx", delim, modname, delim,
2473                            symname, offset);
2474                 return;
2475         }
2476 #endif
2477
2478         seq_printf(seq, "[%p]", sym);
2479 }
2480
2481 static int ptype_seq_show(struct seq_file *seq, void *v)
2482 {
2483         struct packet_type *pt = v;
2484
2485         if (v == SEQ_START_TOKEN)
2486                 seq_puts(seq, "Type Device      Function\n");
2487         else {
2488                 if (pt->type == htons(ETH_P_ALL))
2489                         seq_puts(seq, "ALL ");
2490                 else
2491                         seq_printf(seq, "%04x", ntohs(pt->type));
2492
2493                 seq_printf(seq, " %-8s ",
2494                            pt->dev ? pt->dev->name : "");
2495                 ptype_seq_decode(seq,  pt->func);
2496                 seq_putc(seq, '\n');
2497         }
2498
2499         return 0;
2500 }
2501
2502 static const struct seq_operations ptype_seq_ops = {
2503         .start = ptype_seq_start,
2504         .next  = ptype_seq_next,
2505         .stop  = ptype_seq_stop,
2506         .show  = ptype_seq_show,
2507 };
2508
2509 static int ptype_seq_open(struct inode *inode, struct file *file)
2510 {
2511         return seq_open(file, &ptype_seq_ops);
2512 }
2513
2514 static const struct file_operations ptype_seq_fops = {
2515         .owner   = THIS_MODULE,
2516         .open    = ptype_seq_open,
2517         .read    = seq_read,
2518         .llseek  = seq_lseek,
2519         .release = seq_release,
2520 };
2521
2522
2523 static int __init dev_proc_init(void)
2524 {
2525         int rc = -ENOMEM;
2526
2527         if (!proc_net_fops_create("dev", S_IRUGO, &dev_seq_fops))
2528                 goto out;
2529         if (!proc_net_fops_create("softnet_stat", S_IRUGO, &softnet_seq_fops))
2530                 goto out_dev;
2531         if (!proc_net_fops_create("ptype", S_IRUGO, &ptype_seq_fops))
2532                 goto out_dev2;
2533
2534         if (wext_proc_init())
2535                 goto out_softnet;
2536         rc = 0;
2537 out:
2538         return rc;
2539 out_softnet:
2540         proc_net_remove("ptype");
2541 out_dev2:
2542         proc_net_remove("softnet_stat");
2543 out_dev:
2544         proc_net_remove("dev");
2545         goto out;
2546 }
2547 #else
2548 #define dev_proc_init() 0
2549 #endif  /* CONFIG_PROC_FS */
2550
2551
2552 /**
2553  *      netdev_set_master       -       set up master/slave pair
2554  *      @slave: slave device
2555  *      @master: new master device
2556  *
2557  *      Changes the master device of the slave. Pass %NULL to break the
2558  *      bonding. The caller must hold the RTNL semaphore. On a failure
2559  *      a negative errno code is returned. On success the reference counts
2560  *      are adjusted, %RTM_NEWLINK is sent to the routing socket and the
2561  *      function returns zero.
2562  */
2563 int netdev_set_master(struct net_device *slave, struct net_device *master)
2564 {
2565         struct net_device *old = slave->master;
2566
2567         ASSERT_RTNL();
2568
2569         if (master) {
2570                 if (old)
2571                         return -EBUSY;
2572                 dev_hold(master);
2573         }
2574
2575         slave->master = master;
2576
2577         synchronize_net();
2578
2579         if (old)
2580                 dev_put(old);
2581
2582         if (master)
2583                 slave->flags |= IFF_SLAVE;
2584         else
2585                 slave->flags &= ~IFF_SLAVE;
2586
2587         rtmsg_ifinfo(RTM_NEWLINK, slave, IFF_SLAVE);
2588         return 0;
2589 }
2590
2591 static void __dev_set_promiscuity(struct net_device *dev, int inc)
2592 {
2593         unsigned short old_flags = dev->flags;
2594
2595         ASSERT_RTNL();
2596
2597         if ((dev->promiscuity += inc) == 0)
2598                 dev->flags &= ~IFF_PROMISC;
2599         else
2600                 dev->flags |= IFF_PROMISC;
2601         if (dev->flags != old_flags) {
2602                 printk(KERN_INFO "device %s %s promiscuous mode\n",
2603                        dev->name, (dev->flags & IFF_PROMISC) ? "entered" :
2604                                                                "left");
2605                 audit_log(current->audit_context, GFP_ATOMIC,
2606                         AUDIT_ANOM_PROMISCUOUS,
2607                         "dev=%s prom=%d old_prom=%d auid=%u",
2608                         dev->name, (dev->flags & IFF_PROMISC),
2609                         (old_flags & IFF_PROMISC),
2610                         audit_get_loginuid(current->audit_context));
2611
2612                 if (dev->change_rx_flags)
2613                         dev->change_rx_flags(dev, IFF_PROMISC);
2614         }
2615 }
2616
2617 /**
2618  *      dev_set_promiscuity     - update promiscuity count on a device
2619  *      @dev: device
2620  *      @inc: modifier
2621  *
2622  *      Add or remove promiscuity from a device. While the count in the device
2623  *      remains above zero the interface remains promiscuous. Once it hits zero
2624  *      the device reverts back to normal filtering operation. A negative inc
2625  *      value is used to drop promiscuity on the device.
2626  */
2627 void dev_set_promiscuity(struct net_device *dev, int inc)
2628 {
2629         unsigned short old_flags = dev->flags;
2630
2631         __dev_set_promiscuity(dev, inc);
2632         if (dev->flags != old_flags)
2633                 dev_set_rx_mode(dev);
2634 }
2635
2636 /**
2637  *      dev_set_allmulti        - update allmulti count on a device
2638  *      @dev: device
2639  *      @inc: modifier
2640  *
2641  *      Add or remove reception of all multicast frames to a device. While the
2642  *      count in the device remains above zero the interface remains listening
2643  *      to all interfaces. Once it hits zero the device reverts back to normal
2644  *      filtering operation. A negative @inc value is used to drop the counter
2645  *      when releasing a resource needing all multicasts.
2646  */
2647
2648 void dev_set_allmulti(struct net_device *dev, int inc)
2649 {
2650         unsigned short old_flags = dev->flags;
2651
2652         ASSERT_RTNL();
2653
2654         dev->flags |= IFF_ALLMULTI;
2655         if ((dev->allmulti += inc) == 0)
2656                 dev->flags &= ~IFF_ALLMULTI;
2657         if (dev->flags ^ old_flags) {
2658                 if (dev->change_rx_flags)
2659                         dev->change_rx_flags(dev, IFF_ALLMULTI);
2660                 dev_set_rx_mode(dev);
2661         }
2662 }
2663
2664 /*
2665  *      Upload unicast and multicast address lists to device and
2666  *      configure RX filtering. When the device doesn't support unicast
2667  *      filtering it is put in promiscous mode while unicast addresses
2668  *      are present.
2669  */
2670 void __dev_set_rx_mode(struct net_device *dev)
2671 {
2672         /* dev_open will call this function so the list will stay sane. */
2673         if (!(dev->flags&IFF_UP))
2674                 return;
2675
2676         if (!netif_device_present(dev))
2677                 return;
2678
2679         if (dev->set_rx_mode)
2680                 dev->set_rx_mode(dev);
2681         else {
2682                 /* Unicast addresses changes may only happen under the rtnl,
2683                  * therefore calling __dev_set_promiscuity here is safe.
2684                  */
2685                 if (dev->uc_count > 0 && !dev->uc_promisc) {
2686                         __dev_set_promiscuity(dev, 1);
2687                         dev->uc_promisc = 1;
2688                 } else if (dev->uc_count == 0 && dev->uc_promisc) {
2689                         __dev_set_promiscuity(dev, -1);
2690                         dev->uc_promisc = 0;
2691                 }
2692
2693                 if (dev->set_multicast_list)
2694                         dev->set_multicast_list(dev);
2695         }
2696 }
2697
2698 void dev_set_rx_mode(struct net_device *dev)
2699 {
2700         netif_tx_lock_bh(dev);
2701         __dev_set_rx_mode(dev);
2702         netif_tx_unlock_bh(dev);
2703 }
2704
2705 int __dev_addr_delete(struct dev_addr_list **list, int *count,
2706                       void *addr, int alen, int glbl)
2707 {
2708         struct dev_addr_list *da;
2709
2710         for (; (da = *list) != NULL; list = &da->next) {
2711                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2712                     alen == da->da_addrlen) {
2713                         if (glbl) {
2714                                 int old_glbl = da->da_gusers;
2715                                 da->da_gusers = 0;
2716                                 if (old_glbl == 0)
2717                                         break;
2718                         }
2719                         if (--da->da_users)
2720                                 return 0;
2721
2722                         *list = da->next;
2723                         kfree(da);
2724                         (*count)--;
2725                         return 0;
2726                 }
2727         }
2728         return -ENOENT;
2729 }
2730
2731 int __dev_addr_add(struct dev_addr_list **list, int *count,
2732                    void *addr, int alen, int glbl)
2733 {
2734         struct dev_addr_list *da;
2735
2736         for (da = *list; da != NULL; da = da->next) {
2737                 if (memcmp(da->da_addr, addr, da->da_addrlen) == 0 &&
2738                     da->da_addrlen == alen) {
2739                         if (glbl) {
2740                                 int old_glbl = da->da_gusers;
2741                                 da->da_gusers = 1;
2742                                 if (old_glbl)
2743                                         return 0;
2744                         }
2745                         da->da_users++;
2746                         return 0;
2747                 }
2748         }
2749
2750         da = kmalloc(sizeof(*da), GFP_ATOMIC);
2751         if (da == NULL)
2752                 return -ENOMEM;
2753         memcpy(da->da_addr, addr, alen);
2754         da->da_addrlen = alen;
2755         da->da_users = 1;
2756         da->da_gusers = glbl ? 1 : 0;
2757         da->next = *list;
2758         *list = da;
2759         (*count)++;
2760         return 0;
2761 }
2762
2763 /**
2764  *      dev_unicast_delete      - Release secondary unicast address.
2765  *      @dev: device
2766  *      @addr: address to delete
2767  *      @alen: length of @addr
2768  *
2769  *      Release reference to a secondary unicast address and remove it
2770  *      from the device if the reference count drops to zero.
2771  *
2772  *      The caller must hold the rtnl_mutex.
2773  */
2774 int dev_unicast_delete(struct net_device *dev, void *addr, int alen)
2775 {
2776         int err;
2777
2778         ASSERT_RTNL();
2779
2780         netif_tx_lock_bh(dev);
2781         err = __dev_addr_delete(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2782         if (!err)
2783                 __dev_set_rx_mode(dev);
2784         netif_tx_unlock_bh(dev);
2785         return err;
2786 }
2787 EXPORT_SYMBOL(dev_unicast_delete);
2788
2789 /**
2790  *      dev_unicast_add         - add a secondary unicast address
2791  *      @dev: device
2792  *      @addr: address to delete
2793  *      @alen: length of @addr
2794  *
2795  *      Add a secondary unicast address to the device or increase
2796  *      the reference count if it already exists.
2797  *
2798  *      The caller must hold the rtnl_mutex.
2799  */
2800 int dev_unicast_add(struct net_device *dev, void *addr, int alen)
2801 {
2802         int err;
2803
2804         ASSERT_RTNL();
2805
2806         netif_tx_lock_bh(dev);
2807         err = __dev_addr_add(&dev->uc_list, &dev->uc_count, addr, alen, 0);
2808         if (!err)
2809                 __dev_set_rx_mode(dev);
2810         netif_tx_unlock_bh(dev);
2811         return err;
2812 }
2813 EXPORT_SYMBOL(dev_unicast_add);
2814
2815 static void __dev_addr_discard(struct dev_addr_list **list)
2816 {
2817         struct dev_addr_list *tmp;
2818
2819         while (*list != NULL) {
2820                 tmp = *list;
2821                 *list = tmp->next;
2822                 if (tmp->da_users > tmp->da_gusers)
2823                         printk("__dev_addr_discard: address leakage! "
2824                                "da_users=%d\n", tmp->da_users);
2825                 kfree(tmp);
2826         }
2827 }
2828
2829 static void dev_addr_discard(struct net_device *dev)
2830 {
2831         netif_tx_lock_bh(dev);
2832
2833         __dev_addr_discard(&dev->uc_list);
2834         dev->uc_count = 0;
2835
2836         __dev_addr_discard(&dev->mc_list);
2837         dev->mc_count = 0;
2838
2839         netif_tx_unlock_bh(dev);
2840 }
2841
2842 unsigned dev_get_flags(const struct net_device *dev)
2843 {
2844         unsigned flags;
2845
2846         flags = (dev->flags & ~(IFF_PROMISC |
2847                                 IFF_ALLMULTI |
2848                                 IFF_RUNNING |
2849                                 IFF_LOWER_UP |
2850                                 IFF_DORMANT)) |
2851                 (dev->gflags & (IFF_PROMISC |
2852                                 IFF_ALLMULTI));
2853
2854         if (netif_running(dev)) {
2855                 if (netif_oper_up(dev))
2856                         flags |= IFF_RUNNING;
2857                 if (netif_carrier_ok(dev))
2858                         flags |= IFF_LOWER_UP;
2859                 if (netif_dormant(dev))
2860                         flags |= IFF_DORMANT;
2861         }
2862
2863         return flags;
2864 }
2865
2866 int dev_change_flags(struct net_device *dev, unsigned flags)
2867 {
2868         int ret, changes;
2869         int old_flags = dev->flags;
2870
2871         ASSERT_RTNL();
2872
2873         /*
2874          *      Set the flags on our device.
2875          */
2876
2877         dev->flags = (flags & (IFF_DEBUG | IFF_NOTRAILERS | IFF_NOARP |
2878                                IFF_DYNAMIC | IFF_MULTICAST | IFF_PORTSEL |
2879                                IFF_AUTOMEDIA)) |
2880                      (dev->flags & (IFF_UP | IFF_VOLATILE | IFF_PROMISC |
2881                                     IFF_ALLMULTI));
2882
2883         /*
2884          *      Load in the correct multicast list now the flags have changed.
2885          */
2886
2887         if (dev->change_rx_flags && (dev->flags ^ flags) & IFF_MULTICAST)
2888                 dev->change_rx_flags(dev, IFF_MULTICAST);
2889
2890         dev_set_rx_mode(dev);
2891
2892         /*
2893          *      Have we downed the interface. We handle IFF_UP ourselves
2894          *      according to user attempts to set it, rather than blindly
2895          *      setting it.
2896          */
2897
2898         ret = 0;
2899         if ((old_flags ^ flags) & IFF_UP) {     /* Bit is different  ? */
2900                 ret = ((old_flags & IFF_UP) ? dev_close : dev_open)(dev);
2901
2902                 if (!ret)
2903                         dev_set_rx_mode(dev);
2904         }
2905
2906         if (dev->flags & IFF_UP &&
2907             ((old_flags ^ dev->flags) &~ (IFF_UP | IFF_PROMISC | IFF_ALLMULTI |
2908                                           IFF_VOLATILE)))
2909                 raw_notifier_call_chain(&netdev_chain,
2910                                 NETDEV_CHANGE, dev);
2911
2912         if ((flags ^ dev->gflags) & IFF_PROMISC) {
2913                 int inc = (flags & IFF_PROMISC) ? +1 : -1;
2914                 dev->gflags ^= IFF_PROMISC;
2915                 dev_set_promiscuity(dev, inc);
2916         }
2917
2918         /* NOTE: order of synchronization of IFF_PROMISC and IFF_ALLMULTI
2919            is important. Some (broken) drivers set IFF_PROMISC, when
2920            IFF_ALLMULTI is requested not asking us and not reporting.
2921          */
2922         if ((flags ^ dev->gflags) & IFF_ALLMULTI) {
2923                 int inc = (flags & IFF_ALLMULTI) ? +1 : -1;
2924                 dev->gflags ^= IFF_ALLMULTI;
2925                 dev_set_allmulti(dev, inc);
2926         }
2927
2928         /* Exclude state transition flags, already notified */
2929         changes = (old_flags ^ dev->flags) & ~(IFF_UP | IFF_RUNNING);
2930         if (changes)
2931                 rtmsg_ifinfo(RTM_NEWLINK, dev, changes);
2932
2933         return ret;
2934 }
2935
2936 int dev_set_mtu(struct net_device *dev, int new_mtu)
2937 {
2938         int err;
2939
2940         if (new_mtu == dev->mtu)
2941                 return 0;
2942
2943         /*      MTU must be positive.    */
2944         if (new_mtu < 0)
2945                 return -EINVAL;
2946
2947         if (!netif_device_present(dev))
2948                 return -ENODEV;
2949
2950         err = 0;
2951         if (dev->change_mtu)
2952                 err = dev->change_mtu(dev, new_mtu);
2953         else
2954                 dev->mtu = new_mtu;
2955         if (!err && dev->flags & IFF_UP)
2956                 raw_notifier_call_chain(&netdev_chain,
2957                                 NETDEV_CHANGEMTU, dev);
2958         return err;
2959 }
2960
2961 int dev_set_mac_address(struct net_device *dev, struct sockaddr *sa)
2962 {
2963         int err;
2964
2965         if (!dev->set_mac_address)
2966                 return -EOPNOTSUPP;
2967         if (sa->sa_family != dev->type)
2968                 return -EINVAL;
2969         if (!netif_device_present(dev))
2970                 return -ENODEV;
2971         err = dev->set_mac_address(dev, sa);
2972         if (!err)
2973                 raw_notifier_call_chain(&netdev_chain,
2974                                 NETDEV_CHANGEADDR, dev);
2975         return err;
2976 }
2977
2978 /*
2979  *      Perform the SIOCxIFxxx calls.
2980  */
2981 static int dev_ifsioc(struct ifreq *ifr, unsigned int cmd)
2982 {
2983         int err;
2984         struct net_device *dev = __dev_get_by_name(ifr->ifr_name);
2985
2986         if (!dev)
2987                 return -ENODEV;
2988
2989         switch (cmd) {
2990                 case SIOCGIFFLAGS:      /* Get interface flags */
2991                         ifr->ifr_flags = dev_get_flags(dev);
2992                         return 0;
2993
2994                 case SIOCSIFFLAGS:      /* Set interface flags */
2995                         return dev_change_flags(dev, ifr->ifr_flags);
2996
2997                 case SIOCGIFMETRIC:     /* Get the metric on the interface
2998                                            (currently unused) */
2999                         ifr->ifr_metric = 0;
3000                         return 0;
3001
3002                 case SIOCSIFMETRIC:     /* Set the metric on the interface
3003                                            (currently unused) */
3004                         return -EOPNOTSUPP;
3005
3006                 case SIOCGIFMTU:        /* Get the MTU of a device */
3007                         ifr->ifr_mtu = dev->mtu;
3008                         return 0;
3009
3010                 case SIOCSIFMTU:        /* Set the MTU of a device */
3011                         return dev_set_mtu(dev, ifr->ifr_mtu);
3012
3013                 case SIOCGIFHWADDR:
3014                         if (!dev->addr_len)
3015                                 memset(ifr->ifr_hwaddr.sa_data, 0, sizeof ifr->ifr_hwaddr.sa_data);
3016                         else
3017                                 memcpy(ifr->ifr_hwaddr.sa_data, dev->dev_addr,
3018                                        min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3019                         ifr->ifr_hwaddr.sa_family = dev->type;
3020                         return 0;
3021
3022                 case SIOCSIFHWADDR:
3023                         return dev_set_mac_address(dev, &ifr->ifr_hwaddr);
3024
3025                 case SIOCSIFHWBROADCAST:
3026                         if (ifr->ifr_hwaddr.sa_family != dev->type)
3027                                 return -EINVAL;
3028                         memcpy(dev->broadcast, ifr->ifr_hwaddr.sa_data,
3029                                min(sizeof ifr->ifr_hwaddr.sa_data, (size_t) dev->addr_len));
3030                         raw_notifier_call_chain(&netdev_chain,
3031                                             NETDEV_CHANGEADDR, dev);
3032                         return 0;
3033
3034                 case SIOCGIFMAP:
3035                         ifr->ifr_map.mem_start = dev->mem_start;
3036                         ifr->ifr_map.mem_end   = dev->mem_end;
3037                         ifr->ifr_map.base_addr = dev->base_addr;
3038                         ifr->ifr_map.irq       = dev->irq;
3039                         ifr->ifr_map.dma       = dev->dma;
3040                         ifr->ifr_map.port      = dev->if_port;
3041                         return 0;
3042
3043                 case SIOCSIFMAP:
3044                         if (dev->set_config) {
3045                                 if (!netif_device_present(dev))
3046                                         return -ENODEV;
3047                                 return dev->set_config(dev, &ifr->ifr_map);
3048                         }
3049                         return -EOPNOTSUPP;
3050
3051                 case SIOCADDMULTI:
3052                         if (!dev->set_multicast_list ||
3053                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3054                                 return -EINVAL;
3055                         if (!netif_device_present(dev))
3056                                 return -ENODEV;
3057                         return dev_mc_add(dev, ifr->ifr_hwaddr.sa_data,
3058                                           dev->addr_len, 1);
3059
3060                 case SIOCDELMULTI:
3061                         if (!dev->set_multicast_list ||
3062                             ifr->ifr_hwaddr.sa_family != AF_UNSPEC)
3063                                 return -EINVAL;
3064                         if (!netif_device_present(dev))
3065                                 return -ENODEV;
3066                         return dev_mc_delete(dev, ifr->ifr_hwaddr.sa_data,
3067                                              dev->addr_len, 1);
3068
3069                 case SIOCGIFINDEX:
3070                         ifr->ifr_ifindex = dev->ifindex;
3071                         return 0;
3072
3073                 case SIOCGIFTXQLEN:
3074                         ifr->ifr_qlen = dev->tx_queue_len;
3075                         return 0;
3076
3077                 case SIOCSIFTXQLEN:
3078                         if (ifr->ifr_qlen < 0)
3079                                 return -EINVAL;
3080                         dev->tx_queue_len = ifr->ifr_qlen;
3081                         return 0;
3082
3083                 case SIOCSIFNAME:
3084                         ifr->ifr_newname[IFNAMSIZ-1] = '\0';
3085                         return dev_change_name(dev, ifr->ifr_newname);
3086
3087                 /*
3088                  *      Unknown or private ioctl
3089                  */
3090
3091                 default:
3092                         if ((cmd >= SIOCDEVPRIVATE &&
3093                             cmd <= SIOCDEVPRIVATE + 15) ||
3094                             cmd == SIOCBONDENSLAVE ||
3095                             cmd == SIOCBONDRELEASE ||
3096                             cmd == SIOCBONDSETHWADDR ||
3097                             cmd == SIOCBONDSLAVEINFOQUERY ||
3098                             cmd == SIOCBONDINFOQUERY ||
3099                             cmd == SIOCBONDCHANGEACTIVE ||
3100                             cmd == SIOCGMIIPHY ||
3101                             cmd == SIOCGMIIREG ||
3102                             cmd == SIOCSMIIREG ||
3103                             cmd == SIOCBRADDIF ||
3104                             cmd == SIOCBRDELIF ||
3105                             cmd == SIOCWANDEV) {
3106                                 err = -EOPNOTSUPP;
3107                                 if (dev->do_ioctl) {
3108                                         if (netif_device_present(dev))
3109                                                 err = dev->do_ioctl(dev, ifr,
3110                                                                     cmd);
3111                                         else
3112                                                 err = -ENODEV;
3113                                 }
3114                         } else
3115                                 err = -EINVAL;
3116
3117         }
3118         return err;
3119 }
3120
3121 /*
3122  *      This function handles all "interface"-type I/O control requests. The actual
3123  *      'doing' part of this is dev_ifsioc above.
3124  */
3125
3126 /**
3127  *      dev_ioctl       -       network device ioctl
3128  *      @cmd: command to issue
3129  *      @arg: pointer to a struct ifreq in user space
3130  *
3131  *      Issue ioctl functions to devices. This is normally called by the
3132  *      user space syscall interfaces but can sometimes be useful for
3133  *      other purposes. The return value is the return from the syscall if
3134  *      positive or a negative errno code on error.
3135  */
3136
3137 int dev_ioctl(unsigned int cmd, void __user *arg)
3138 {
3139         struct ifreq ifr;
3140         int ret;
3141         char *colon;
3142
3143         /* One special case: SIOCGIFCONF takes ifconf argument
3144            and requires shared lock, because it sleeps writing
3145            to user space.
3146          */
3147
3148         if (cmd == SIOCGIFCONF) {
3149                 rtnl_lock();
3150                 ret = dev_ifconf((char __user *) arg);
3151                 rtnl_unlock();
3152                 return ret;
3153         }
3154         if (cmd == SIOCGIFNAME)
3155                 return dev_ifname((struct ifreq __user *)arg);
3156
3157         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
3158                 return -EFAULT;
3159
3160         ifr.ifr_name[IFNAMSIZ-1] = 0;
3161
3162         colon = strchr(ifr.ifr_name, ':');
3163         if (colon)
3164                 *colon = 0;
3165
3166         /*
3167          *      See which interface the caller is talking about.
3168          */
3169
3170         switch (cmd) {
3171                 /*
3172                  *      These ioctl calls:
3173                  *      - can be done by all.
3174                  *      - atomic and do not require locking.
3175                  *      - return a value
3176                  */
3177                 case SIOCGIFFLAGS:
3178                 case SIOCGIFMETRIC:
3179                 case SIOCGIFMTU:
3180                 case SIOCGIFHWADDR:
3181                 case SIOCGIFSLAVE:
3182                 case SIOCGIFMAP:
3183                 case SIOCGIFINDEX:
3184                 case SIOCGIFTXQLEN:
3185                         dev_load(ifr.ifr_name);
3186                         read_lock(&dev_base_lock);
3187                         ret = dev_ifsioc(&ifr, cmd);
3188                         read_unlock(&dev_base_lock);
3189                         if (!ret) {
3190                                 if (colon)
3191                                         *colon = ':';
3192                                 if (copy_to_user(arg, &ifr,
3193                                                  sizeof(struct ifreq)))
3194                                         ret = -EFAULT;
3195                         }
3196                         return ret;
3197
3198                 case SIOCETHTOOL:
3199                         dev_load(ifr.ifr_name);
3200                         rtnl_lock();
3201                         ret = dev_ethtool(&ifr);
3202                         rtnl_unlock();
3203                         if (!ret) {
3204                                 if (colon)
3205                                         *colon = ':';
3206                                 if (copy_to_user(arg, &ifr,
3207                                                  sizeof(struct ifreq)))
3208                                         ret = -EFAULT;
3209                         }
3210                         return ret;
3211
3212                 /*
3213                  *      These ioctl calls:
3214                  *      - require superuser power.
3215                  *      - require strict serialization.
3216                  *      - return a value
3217                  */
3218                 case SIOCGMIIPHY:
3219                 case SIOCGMIIREG:
3220                 case SIOCSIFNAME:
3221                         if (!capable(CAP_NET_ADMIN))
3222                                 return -EPERM;
3223                         dev_load(ifr.ifr_name);
3224                         rtnl_lock();
3225                         ret = dev_ifsioc(&ifr, cmd);
3226                         rtnl_unlock();
3227                         if (!ret) {
3228                                 if (colon)
3229                                         *colon = ':';
3230                                 if (copy_to_user(arg, &ifr,
3231                                                  sizeof(struct ifreq)))
3232                                         ret = -EFAULT;
3233                         }
3234                         return ret;
3235
3236                 /*
3237                  *      These ioctl calls:
3238                  *      - require superuser power.
3239                  *      - require strict serialization.
3240                  *      - do not return a value
3241                  */
3242                 case SIOCSIFFLAGS:
3243                 case SIOCSIFMETRIC:
3244                 case SIOCSIFMTU:
3245                 case SIOCSIFMAP:
3246                 case SIOCSIFHWADDR:
3247                 case SIOCSIFSLAVE:
3248                 case SIOCADDMULTI:
3249                 case SIOCDELMULTI:
3250                 case SIOCSIFHWBROADCAST:
3251                 case SIOCSIFTXQLEN:
3252                 case SIOCSMIIREG:
3253                 case SIOCBONDENSLAVE:
3254                 case SIOCBONDRELEASE:
3255                 case SIOCBONDSETHWADDR:
3256                 case SIOCBONDCHANGEACTIVE:
3257                 case SIOCBRADDIF:
3258                 case SIOCBRDELIF:
3259                         if (!capable(CAP_NET_ADMIN))
3260                                 return -EPERM;
3261                         /* fall through */
3262                 case SIOCBONDSLAVEINFOQUERY:
3263                 case SIOCBONDINFOQUERY:
3264                         dev_load(ifr.ifr_name);
3265                         rtnl_lock();
3266                         ret = dev_ifsioc(&ifr, cmd);
3267                         rtnl_unlock();
3268                         return ret;
3269
3270                 case SIOCGIFMEM:
3271                         /* Get the per device memory space. We can add this but
3272                          * currently do not support it */
3273                 case SIOCSIFMEM:
3274                         /* Set the per device memory buffer space.
3275                          * Not applicable in our case */
3276                 case SIOCSIFLINK:
3277                         return -EINVAL;
3278
3279                 /*
3280                  *      Unknown or private ioctl.
3281                  */
3282                 default:
3283                         if (cmd == SIOCWANDEV ||
3284                             (cmd >= SIOCDEVPRIVATE &&
3285                              cmd <= SIOCDEVPRIVATE + 15)) {
3286                                 dev_load(ifr.ifr_name);
3287                                 rtnl_lock();
3288                                 ret = dev_ifsioc(&ifr, cmd);
3289                                 rtnl_unlock();
3290                                 if (!ret && copy_to_user(arg, &ifr,
3291                                                          sizeof(struct ifreq)))
3292                                         ret = -EFAULT;
3293                                 return ret;
3294                         }
3295                         /* Take care of Wireless Extensions */
3296                         if (cmd >= SIOCIWFIRST && cmd <= SIOCIWLAST)
3297                                 return wext_handle_ioctl(&ifr, cmd, arg);
3298                         return -EINVAL;
3299         }
3300 }
3301
3302
3303 /**
3304  *      dev_new_index   -       allocate an ifindex
3305  *
3306  *      Returns a suitable unique value for a new device interface
3307  *      number.  The caller must hold the rtnl semaphore or the
3308  *      dev_base_lock to be sure it remains unique.
3309  */
3310 static int dev_new_index(void)
3311 {
3312         static int ifindex;
3313         for (;;) {
3314                 if (++ifindex <= 0)
3315                         ifindex = 1;
3316                 if (!__dev_get_by_index(ifindex))
3317                         return ifindex;
3318         }
3319 }
3320
3321 static int dev_boot_phase = 1;
3322
3323 /* Delayed registration/unregisteration */
3324 static DEFINE_SPINLOCK(net_todo_list_lock);
3325 static struct list_head net_todo_list = LIST_HEAD_INIT(net_todo_list);
3326
3327 static void net_set_todo(struct net_device *dev)
3328 {
3329         spin_lock(&net_todo_list_lock);
3330         list_add_tail(&dev->todo_list, &net_todo_list);
3331         spin_unlock(&net_todo_list_lock);
3332 }
3333
3334 /**
3335  *      register_netdevice      - register a network device
3336  *      @dev: device to register
3337  *
3338  *      Take a completed network device structure and add it to the kernel
3339  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3340  *      chain. 0 is returned on success. A negative errno code is returned
3341  *      on a failure to set up the device, or if the name is a duplicate.
3342  *
3343  *      Callers must hold the rtnl semaphore. You may want
3344  *      register_netdev() instead of this.
3345  *
3346  *      BUGS:
3347  *      The locking appears insufficient to guarantee two parallel registers
3348  *      will not get the same name.
3349  */
3350
3351 int register_netdevice(struct net_device *dev)
3352 {
3353         struct hlist_head *head;
3354         struct hlist_node *p;
3355         int ret;
3356
3357         BUG_ON(dev_boot_phase);
3358         ASSERT_RTNL();
3359
3360         might_sleep();
3361
3362         /* When net_device's are persistent, this will be fatal. */
3363         BUG_ON(dev->reg_state != NETREG_UNINITIALIZED);
3364
3365         spin_lock_init(&dev->queue_lock);
3366         spin_lock_init(&dev->_xmit_lock);
3367         netdev_set_lockdep_class(&dev->_xmit_lock, dev->type);
3368         dev->xmit_lock_owner = -1;
3369         spin_lock_init(&dev->ingress_lock);
3370
3371         dev->iflink = -1;
3372
3373         /* Init, if this function is available */
3374         if (dev->init) {
3375                 ret = dev->init(dev);
3376                 if (ret) {
3377                         if (ret > 0)
3378                                 ret = -EIO;
3379                         goto out;
3380                 }
3381         }
3382
3383         if (!dev_valid_name(dev->name)) {
3384                 ret = -EINVAL;
3385                 goto err_uninit;
3386         }
3387
3388         dev->ifindex = dev_new_index();
3389         if (dev->iflink == -1)
3390                 dev->iflink = dev->ifindex;
3391
3392         /* Check for existence of name */
3393         head = dev_name_hash(dev->name);
3394         hlist_for_each(p, head) {
3395                 struct net_device *d
3396                         = hlist_entry(p, struct net_device, name_hlist);
3397                 if (!strncmp(d->name, dev->name, IFNAMSIZ)) {
3398                         ret = -EEXIST;
3399                         goto err_uninit;
3400                 }
3401         }
3402
3403         /* Fix illegal checksum combinations */
3404         if ((dev->features & NETIF_F_HW_CSUM) &&
3405             (dev->features & (NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3406                 printk(KERN_NOTICE "%s: mixed HW and IP checksum settings.\n",
3407                        dev->name);
3408                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM);
3409         }
3410
3411         if ((dev->features & NETIF_F_NO_CSUM) &&
3412             (dev->features & (NETIF_F_HW_CSUM|NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM))) {
3413                 printk(KERN_NOTICE "%s: mixed no checksumming and other settings.\n",
3414                        dev->name);
3415                 dev->features &= ~(NETIF_F_IP_CSUM|NETIF_F_IPV6_CSUM|NETIF_F_HW_CSUM);
3416         }
3417
3418
3419         /* Fix illegal SG+CSUM combinations. */
3420         if ((dev->features & NETIF_F_SG) &&
3421             !(dev->features & NETIF_F_ALL_CSUM)) {
3422                 printk(KERN_NOTICE "%s: Dropping NETIF_F_SG since no checksum feature.\n",
3423                        dev->name);
3424                 dev->features &= ~NETIF_F_SG;
3425         }
3426
3427         /* TSO requires that SG is present as well. */
3428         if ((dev->features & NETIF_F_TSO) &&
3429             !(dev->features & NETIF_F_SG)) {
3430                 printk(KERN_NOTICE "%s: Dropping NETIF_F_TSO since no SG feature.\n",
3431                        dev->name);
3432                 dev->features &= ~NETIF_F_TSO;
3433         }
3434         if (dev->features & NETIF_F_UFO) {
3435                 if (!(dev->features & NETIF_F_HW_CSUM)) {
3436                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3437                                         "NETIF_F_HW_CSUM feature.\n",
3438                                                         dev->name);
3439                         dev->features &= ~NETIF_F_UFO;
3440                 }
3441                 if (!(dev->features & NETIF_F_SG)) {
3442                         printk(KERN_ERR "%s: Dropping NETIF_F_UFO since no "
3443                                         "NETIF_F_SG feature.\n",
3444                                         dev->name);
3445                         dev->features &= ~NETIF_F_UFO;
3446                 }
3447         }
3448
3449         /*
3450          *      nil rebuild_header routine,
3451          *      that should be never called and used as just bug trap.
3452          */
3453
3454         if (!dev->rebuild_header)
3455                 dev->rebuild_header = default_rebuild_header;
3456
3457         ret = netdev_register_sysfs(dev);
3458         if (ret)
3459                 goto err_uninit;
3460         dev->reg_state = NETREG_REGISTERED;
3461
3462         /*
3463          *      Default initial state at registry is that the
3464          *      device is present.
3465          */
3466
3467         set_bit(__LINK_STATE_PRESENT, &dev->state);
3468
3469         dev_init_scheduler(dev);
3470         write_lock_bh(&dev_base_lock);
3471         list_add_tail(&dev->dev_list, &dev_base_head);
3472         hlist_add_head(&dev->name_hlist, head);
3473         hlist_add_head(&dev->index_hlist, dev_index_hash(dev->ifindex));
3474         dev_hold(dev);
3475         write_unlock_bh(&dev_base_lock);
3476
3477         /* Notify protocols, that a new device appeared. */
3478         ret = raw_notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
3479         ret = notifier_to_errno(ret);
3480         if (ret)
3481                 unregister_netdevice(dev);
3482
3483 out:
3484         return ret;
3485
3486 err_uninit:
3487         if (dev->uninit)
3488                 dev->uninit(dev);
3489         goto out;
3490 }
3491
3492 /**
3493  *      register_netdev - register a network device
3494  *      @dev: device to register
3495  *
3496  *      Take a completed network device structure and add it to the kernel
3497  *      interfaces. A %NETDEV_REGISTER message is sent to the netdev notifier
3498  *      chain. 0 is returned on success. A negative errno code is returned
3499  *      on a failure to set up the device, or if the name is a duplicate.
3500  *
3501  *      This is a wrapper around register_netdevice that takes the rtnl semaphore
3502  *      and expands the device name if you passed a format string to
3503  *      alloc_netdev.
3504  */
3505 int register_netdev(struct net_device *dev)
3506 {
3507         int err;
3508
3509         rtnl_lock();
3510
3511         /*
3512          * If the name is a format string the caller wants us to do a
3513          * name allocation.
3514          */
3515         if (strchr(dev->name, '%')) {
3516                 err = dev_alloc_name(dev, dev->name);
3517                 if (err < 0)
3518                         goto out;
3519         }
3520
3521         err = register_netdevice(dev);
3522 out:
3523         rtnl_unlock();
3524         return err;
3525 }
3526 EXPORT_SYMBOL(register_netdev);
3527
3528 /*
3529  * netdev_wait_allrefs - wait until all references are gone.
3530  *
3531  * This is called when unregistering network devices.
3532  *
3533  * Any protocol or device that holds a reference should register
3534  * for netdevice notification, and cleanup and put back the
3535  * reference if they receive an UNREGISTER event.
3536  * We can get stuck here if buggy protocols don't correctly
3537  * call dev_put.
3538  */
3539 static void netdev_wait_allrefs(struct net_device *dev)
3540 {
3541         unsigned long rebroadcast_time, warning_time;
3542
3543         rebroadcast_time = warning_time = jiffies;
3544         while (atomic_read(&dev->refcnt) != 0) {
3545                 if (time_after(jiffies, rebroadcast_time + 1 * HZ)) {
3546                         rtnl_lock();
3547
3548                         /* Rebroadcast unregister notification */
3549                         raw_notifier_call_chain(&netdev_chain,
3550                                             NETDEV_UNREGISTER, dev);
3551
3552                         if (test_bit(__LINK_STATE_LINKWATCH_PENDING,
3553                                      &dev->state)) {
3554                                 /* We must not have linkwatch events
3555                                  * pending on unregister. If this
3556                                  * happens, we simply run the queue
3557                                  * unscheduled, resulting in a noop
3558                                  * for this device.
3559                                  */
3560                                 linkwatch_run_queue();
3561                         }
3562
3563                         __rtnl_unlock();
3564
3565                         rebroadcast_time = jiffies;
3566                 }
3567
3568                 msleep(250);
3569
3570                 if (time_after(jiffies, warning_time + 10 * HZ)) {
3571                         printk(KERN_EMERG "unregister_netdevice: "
3572                                "waiting for %s to become free. Usage "
3573                                "count = %d\n",
3574                                dev->name, atomic_read(&dev->refcnt));
3575                         warning_time = jiffies;
3576                 }
3577         }
3578 }
3579
3580 /* The sequence is:
3581  *
3582  *      rtnl_lock();
3583  *      ...
3584  *      register_netdevice(x1);
3585  *      register_netdevice(x2);
3586  *      ...
3587  *      unregister_netdevice(y1);
3588  *      unregister_netdevice(y2);
3589  *      ...
3590  *      rtnl_unlock();
3591  *      free_netdev(y1);
3592  *      free_netdev(y2);
3593  *
3594  * We are invoked by rtnl_unlock() after it drops the semaphore.
3595  * This allows us to deal with problems:
3596  * 1) We can delete sysfs objects which invoke hotplug
3597  *    without deadlocking with linkwatch via keventd.
3598  * 2) Since we run with the RTNL semaphore not held, we can sleep
3599  *    safely in order to wait for the netdev refcnt to drop to zero.
3600  */
3601 static DEFINE_MUTEX(net_todo_run_mutex);
3602 void netdev_run_todo(void)
3603 {
3604         struct list_head list;
3605
3606         /* Need to guard against multiple cpu's getting out of order. */
3607         mutex_lock(&net_todo_run_mutex);
3608
3609         /* Not safe to do outside the semaphore.  We must not return
3610          * until all unregister events invoked by the local processor
3611          * have been completed (either by this todo run, or one on
3612          * another cpu).
3613          */
3614         if (list_empty(&net_todo_list))
3615                 goto out;
3616
3617         /* Snapshot list, allow later requests */
3618         spin_lock(&net_todo_list_lock);
3619         list_replace_init(&net_todo_list, &list);
3620         spin_unlock(&net_todo_list_lock);
3621
3622         while (!list_empty(&list)) {
3623                 struct net_device *dev
3624                         = list_entry(list.next, struct net_device, todo_list);
3625                 list_del(&dev->todo_list);
3626
3627                 if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) {
3628                         printk(KERN_ERR "network todo '%s' but state %d\n",
3629                                dev->name, dev->reg_state);
3630                         dump_stack();
3631                         continue;
3632                 }
3633
3634                 dev->reg_state = NETREG_UNREGISTERED;
3635
3636                 netdev_wait_allrefs(dev);
3637
3638                 /* paranoia */
3639                 BUG_ON(atomic_read(&dev->refcnt));
3640                 BUG_TRAP(!dev->ip_ptr);
3641                 BUG_TRAP(!dev->ip6_ptr);
3642                 BUG_TRAP(!dev->dn_ptr);
3643
3644                 if (dev->destructor)
3645                         dev->destructor(dev);
3646
3647                 /* Free network device */
3648                 kobject_put(&dev->dev.kobj);
3649         }
3650
3651 out:
3652         mutex_unlock(&net_todo_run_mutex);
3653 }
3654
3655 static struct net_device_stats *internal_stats(struct net_device *dev)
3656 {
3657         return &dev->stats;
3658 }
3659
3660 /**
3661  *      alloc_netdev_mq - allocate network device
3662  *      @sizeof_priv:   size of private data to allocate space for
3663  *      @name:          device name format string
3664  *      @setup:         callback to initialize device
3665  *      @queue_count:   the number of subqueues to allocate
3666  *
3667  *      Allocates a struct net_device with private data area for driver use
3668  *      and performs basic initialization.  Also allocates subquue structs
3669  *      for each queue on the device at the end of the netdevice.
3670  */
3671 struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
3672                 void (*setup)(struct net_device *), unsigned int queue_count)
3673 {
3674         void *p;
3675         struct net_device *dev;
3676         int alloc_size;
3677
3678         BUG_ON(strlen(name) >= sizeof(dev->name));
3679
3680         /* ensure 32-byte alignment of both the device and private area */
3681         alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST +
3682                      (sizeof(struct net_device_subqueue) * (queue_count - 1))) &
3683                      ~NETDEV_ALIGN_CONST;
3684         alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
3685
3686         p = kzalloc(alloc_size, GFP_KERNEL);
3687         if (!p) {
3688                 printk(KERN_ERR "alloc_netdev: Unable to allocate device.\n");
3689                 return NULL;
3690         }
3691
3692         dev = (struct net_device *)
3693                 (((long)p + NETDEV_ALIGN_CONST) & ~NETDEV_ALIGN_CONST);
3694         dev->padded = (char *)dev - (char *)p;
3695
3696         if (sizeof_priv) {
3697                 dev->priv = ((char *)dev +
3698                              ((sizeof(struct net_device) +
3699                                (sizeof(struct net_device_subqueue) *
3700                                 (queue_count - 1)) + NETDEV_ALIGN_CONST)
3701                               & ~NETDEV_ALIGN_CONST));
3702         }
3703
3704         dev->egress_subqueue_count = queue_count;
3705
3706         dev->get_stats = internal_stats;
3707         setup(dev);
3708         strcpy(dev->name, name);
3709         return dev;
3710 }
3711 EXPORT_SYMBOL(alloc_netdev_mq);
3712
3713 /**
3714  *      free_netdev - free network device
3715  *      @dev: device
3716  *
3717  *      This function does the last stage of destroying an allocated device
3718  *      interface. The reference to the device object is released.
3719  *      If this is the last reference then it will be freed.
3720  */
3721 void free_netdev(struct net_device *dev)
3722 {
3723 #ifdef CONFIG_SYSFS
3724         /*  Compatibility with error handling in drivers */
3725         if (dev->reg_state == NETREG_UNINITIALIZED) {
3726                 kfree((char *)dev - dev->padded);
3727                 return;
3728         }
3729
3730         BUG_ON(dev->reg_state != NETREG_UNREGISTERED);
3731         dev->reg_state = NETREG_RELEASED;
3732
3733         /* will free via device release */
3734         put_device(&dev->dev);
3735 #else
3736         kfree((char *)dev - dev->padded);
3737 #endif
3738 }
3739
3740 /* Synchronize with packet receive processing. */
3741 void synchronize_net(void)
3742 {
3743         might_sleep();
3744         synchronize_rcu();
3745 }
3746
3747 /**
3748  *      unregister_netdevice - remove device from the kernel
3749  *      @dev: device
3750  *
3751  *      This function shuts down a device interface and removes it
3752  *      from the kernel tables. On success 0 is returned, on a failure
3753  *      a negative errno code is returned.
3754  *
3755  *      Callers must hold the rtnl semaphore.  You may want
3756  *      unregister_netdev() instead of this.
3757  */
3758
3759 void unregister_netdevice(struct net_device *dev)
3760 {
3761         BUG_ON(dev_boot_phase);
3762         ASSERT_RTNL();
3763
3764         /* Some devices call without registering for initialization unwind. */
3765         if (dev->reg_state == NETREG_UNINITIALIZED) {
3766                 printk(KERN_DEBUG "unregister_netdevice: device %s/%p never "
3767                                   "was registered\n", dev->name, dev);
3768
3769                 WARN_ON(1);
3770                 return;
3771         }
3772
3773         BUG_ON(dev->reg_state != NETREG_REGISTERED);
3774
3775         /* If device is running, close it first. */
3776         if (dev->flags & IFF_UP)
3777                 dev_close(dev);
3778
3779         /* And unlink it from device chain. */
3780         write_lock_bh(&dev_base_lock);
3781         list_del(&dev->dev_list);
3782         hlist_del(&dev->name_hlist);
3783         hlist_del(&dev->index_hlist);
3784         write_unlock_bh(&dev_base_lock);
3785
3786         dev->reg_state = NETREG_UNREGISTERING;
3787
3788         synchronize_net();
3789
3790         /* Shutdown queueing discipline. */
3791         dev_shutdown(dev);
3792
3793
3794         /* Notify protocols, that we are about to destroy
3795            this device. They should clean all the things.
3796         */
3797         raw_notifier_call_chain(&netdev_chain, NETDEV_UNREGISTER, dev);
3798
3799         /*
3800          *      Flush the unicast and multicast chains
3801          */
3802         dev_addr_discard(dev);
3803
3804         if (dev->uninit)
3805                 dev->uninit(dev);
3806
3807         /* Notifier chain MUST detach us from master device. */
3808         BUG_TRAP(!dev->master);
3809
3810         /* Remove entries from sysfs */
3811         netdev_unregister_sysfs(dev);
3812
3813         /* Finish processing unregister after unlock */
3814         net_set_todo(dev);
3815
3816         synchronize_net();
3817
3818         dev_put(dev);
3819 }
3820
3821 /**
3822  *      unregister_netdev - remove device from the kernel
3823  *      @dev: device
3824  *
3825  *      This function shuts down a device interface and removes it
3826  *      from the kernel tables. On success 0 is returned, on a failure
3827  *      a negative errno code is returned.
3828  *
3829  *      This is just a wrapper for unregister_netdevice that takes
3830  *      the rtnl semaphore.  In general you want to use this and not
3831  *      unregister_netdevice.
3832  */
3833 void unregister_netdev(struct net_device *dev)
3834 {
3835         rtnl_lock();
3836         unregister_netdevice(dev);
3837         rtnl_unlock();
3838 }
3839
3840 EXPORT_SYMBOL(unregister_netdev);
3841
3842 static int dev_cpu_callback(struct notifier_block *nfb,
3843                             unsigned long action,
3844                             void *ocpu)
3845 {
3846         struct sk_buff **list_skb;
3847         struct net_device **list_net;
3848         struct sk_buff *skb;
3849         unsigned int cpu, oldcpu = (unsigned long)ocpu;
3850         struct softnet_data *sd, *oldsd;
3851
3852         if (action != CPU_DEAD && action != CPU_DEAD_FROZEN)
3853                 return NOTIFY_OK;
3854
3855         local_irq_disable();
3856         cpu = smp_processor_id();
3857         sd = &per_cpu(softnet_data, cpu);
3858         oldsd = &per_cpu(softnet_data, oldcpu);
3859
3860         /* Find end of our completion_queue. */
3861         list_skb = &sd->completion_queue;
3862         while (*list_skb)
3863                 list_skb = &(*list_skb)->next;
3864         /* Append completion queue from offline CPU. */
3865         *list_skb = oldsd->completion_queue;
3866         oldsd->completion_queue = NULL;
3867
3868         /* Find end of our output_queue. */
3869         list_net = &sd->output_queue;
3870         while (*list_net)
3871                 list_net = &(*list_net)->next_sched;
3872         /* Append output queue from offline CPU. */
3873         *list_net = oldsd->output_queue;
3874         oldsd->output_queue = NULL;
3875
3876         raise_softirq_irqoff(NET_TX_SOFTIRQ);
3877         local_irq_enable();
3878
3879         /* Process offline CPU's input_pkt_queue */
3880         while ((skb = __skb_dequeue(&oldsd->input_pkt_queue)))
3881                 netif_rx(skb);
3882
3883         return NOTIFY_OK;
3884 }
3885
3886 #ifdef CONFIG_NET_DMA
3887 /**
3888  * net_dma_rebalance - try to maintain one DMA channel per CPU
3889  * @net_dma: DMA client and associated data (lock, channels, channel_mask)
3890  *
3891  * This is called when the number of channels allocated to the net_dma client
3892  * changes.  The net_dma client tries to have one DMA channel per CPU.
3893  */
3894
3895 static void net_dma_rebalance(struct net_dma *net_dma)
3896 {
3897         unsigned int cpu, i, n, chan_idx;
3898         struct dma_chan *chan;
3899
3900         if (cpus_empty(net_dma->channel_mask)) {
3901                 for_each_online_cpu(cpu)
3902                         rcu_assign_pointer(per_cpu(softnet_data, cpu).net_dma, NULL);
3903                 return;
3904         }
3905
3906         i = 0;
3907         cpu = first_cpu(cpu_online_map);
3908
3909         for_each_cpu_mask(chan_idx, net_dma->channel_mask) {
3910                 chan = net_dma->channels[chan_idx];
3911
3912                 n = ((num_online_cpus() / cpus_weight(net_dma->channel_mask))
3913                    + (i < (num_online_cpus() %
3914                         cpus_weight(net_dma->channel_mask)) ? 1 : 0));
3915
3916                 while(n) {
3917                         per_cpu(softnet_data, cpu).net_dma = chan;
3918                         cpu = next_cpu(cpu, cpu_online_map);
3919                         n--;
3920                 }
3921                 i++;
3922         }
3923 }
3924
3925 /**
3926  * netdev_dma_event - event callback for the net_dma_client
3927  * @client: should always be net_dma_client
3928  * @chan: DMA channel for the event
3929  * @state: DMA state to be handled
3930  */
3931 static enum dma_state_client
3932 netdev_dma_event(struct dma_client *client, struct dma_chan *chan,
3933         enum dma_state state)
3934 {
3935         int i, found = 0, pos = -1;
3936         struct net_dma *net_dma =
3937                 container_of(client, struct net_dma, client);
3938         enum dma_state_client ack = DMA_DUP; /* default: take no action */
3939
3940         spin_lock(&net_dma->lock);
3941         switch (state) {
3942         case DMA_RESOURCE_AVAILABLE:
3943                 for (i = 0; i < NR_CPUS; i++)
3944                         if (net_dma->channels[i] == chan) {
3945                                 found = 1;
3946                                 break;
3947                         } else if (net_dma->channels[i] == NULL && pos < 0)
3948                                 pos = i;
3949
3950                 if (!found && pos >= 0) {
3951                         ack = DMA_ACK;
3952                         net_dma->channels[pos] = chan;
3953                         cpu_set(pos, net_dma->channel_mask);
3954                         net_dma_rebalance(net_dma);
3955                 }
3956                 break;
3957         case DMA_RESOURCE_REMOVED:
3958                 for (i = 0; i < NR_CPUS; i++)
3959                         if (net_dma->channels[i] == chan) {
3960                                 found = 1;
3961                                 pos = i;
3962                                 break;
3963                         }
3964
3965                 if (found) {
3966                         ack = DMA_ACK;
3967                         cpu_clear(pos, net_dma->channel_mask);
3968                         net_dma->channels[i] = NULL;
3969                         net_dma_rebalance(net_dma);
3970                 }
3971                 break;
3972         default:
3973                 break;
3974         }
3975         spin_unlock(&net_dma->lock);
3976
3977         return ack;
3978 }
3979
3980 /**
3981  * netdev_dma_regiser - register the networking subsystem as a DMA client
3982  */
3983 static int __init netdev_dma_register(void)
3984 {
3985         spin_lock_init(&net_dma.lock);
3986         dma_cap_set(DMA_MEMCPY, net_dma.client.cap_mask);
3987         dma_async_client_register(&net_dma.client);
3988         dma_async_client_chan_request(&net_dma.client);
3989         return 0;
3990 }
3991
3992 #else
3993 static int __init netdev_dma_register(void) { return -ENODEV; }
3994 #endif /* CONFIG_NET_DMA */
3995
3996 /**
3997  *      netdev_compute_feature - compute conjunction of two feature sets
3998  *      @all: first feature set
3999  *      @one: second feature set
4000  *
4001  *      Computes a new feature set after adding a device with feature set
4002  *      @one to the master device with current feature set @all.  Returns
4003  *      the new feature set.
4004  */
4005 int netdev_compute_features(unsigned long all, unsigned long one)
4006 {
4007         /* if device needs checksumming, downgrade to hw checksumming */
4008         if (all & NETIF_F_NO_CSUM && !(one & NETIF_F_NO_CSUM))
4009                 all ^= NETIF_F_NO_CSUM | NETIF_F_HW_CSUM;
4010
4011         /* if device can't do all checksum, downgrade to ipv4/ipv6 */
4012         if (all & NETIF_F_HW_CSUM && !(one & NETIF_F_HW_CSUM))
4013                 all ^= NETIF_F_HW_CSUM
4014                         | NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM;
4015
4016         if (one & NETIF_F_GSO)
4017                 one |= NETIF_F_GSO_SOFTWARE;
4018         one |= NETIF_F_GSO;
4019
4020         /* If even one device supports robust GSO, enable it for all. */
4021         if (one & NETIF_F_GSO_ROBUST)
4022                 all |= NETIF_F_GSO_ROBUST;
4023
4024         all &= one | NETIF_F_LLTX;
4025
4026         if (!(all & NETIF_F_ALL_CSUM))
4027                 all &= ~NETIF_F_SG;
4028         if (!(all & NETIF_F_SG))
4029                 all &= ~NETIF_F_GSO_MASK;
4030
4031         return all;
4032 }
4033 EXPORT_SYMBOL(netdev_compute_features);
4034
4035 /*
4036  *      Initialize the DEV module. At boot time this walks the device list and
4037  *      unhooks any devices that fail to initialise (normally hardware not
4038  *      present) and leaves us with a valid list of present and active devices.
4039  *
4040  */
4041
4042 /*
4043  *       This is called single threaded during boot, so no need
4044  *       to take the rtnl semaphore.
4045  */
4046 static int __init net_dev_init(void)
4047 {
4048         int i, rc = -ENOMEM;
4049
4050         BUG_ON(!dev_boot_phase);
4051
4052         if (dev_proc_init())
4053                 goto out;
4054
4055         if (netdev_sysfs_init())
4056                 goto out;
4057
4058         INIT_LIST_HEAD(&ptype_all);
4059         for (i = 0; i < 16; i++)
4060                 INIT_LIST_HEAD(&ptype_base[i]);
4061
4062         for (i = 0; i < ARRAY_SIZE(dev_name_head); i++)
4063                 INIT_HLIST_HEAD(&dev_name_head[i]);
4064
4065         for (i = 0; i < ARRAY_SIZE(dev_index_head); i++)
4066                 INIT_HLIST_HEAD(&dev_index_head[i]);
4067
4068         /*
4069          *      Initialise the packet receive queues.
4070          */
4071
4072         for_each_possible_cpu(i) {
4073                 struct softnet_data *queue;
4074
4075                 queue = &per_cpu(softnet_data, i);
4076                 skb_queue_head_init(&queue->input_pkt_queue);
4077                 queue->completion_queue = NULL;
4078                 INIT_LIST_HEAD(&queue->poll_list);
4079                 set_bit(__LINK_STATE_START, &queue->backlog_dev.state);
4080                 queue->backlog_dev.weight = weight_p;
4081                 queue->backlog_dev.poll = process_backlog;
4082                 atomic_set(&queue->backlog_dev.refcnt, 1);
4083         }
4084
4085         netdev_dma_register();
4086
4087         dev_boot_phase = 0;
4088
4089         open_softirq(NET_TX_SOFTIRQ, net_tx_action, NULL);
4090         open_softirq(NET_RX_SOFTIRQ, net_rx_action, NULL);
4091
4092         hotcpu_notifier(dev_cpu_callback, 0);
4093         dst_init();
4094         dev_mcast_init();
4095         rc = 0;
4096 out:
4097         return rc;
4098 }
4099
4100 subsys_initcall(net_dev_init);
4101
4102 EXPORT_SYMBOL(__dev_get_by_index);
4103 EXPORT_SYMBOL(__dev_get_by_name);
4104 EXPORT_SYMBOL(__dev_remove_pack);
4105 EXPORT_SYMBOL(dev_valid_name);
4106 EXPORT_SYMBOL(dev_add_pack);
4107 EXPORT_SYMBOL(dev_alloc_name);
4108 EXPORT_SYMBOL(dev_close);
4109 EXPORT_SYMBOL(dev_get_by_flags);
4110 EXPORT_SYMBOL(dev_get_by_index);
4111 EXPORT_SYMBOL(dev_get_by_name);
4112 EXPORT_SYMBOL(dev_open);
4113 EXPORT_SYMBOL(dev_queue_xmit);
4114 EXPORT_SYMBOL(dev_remove_pack);
4115 EXPORT_SYMBOL(dev_set_allmulti);
4116 EXPORT_SYMBOL(dev_set_promiscuity);
4117 EXPORT_SYMBOL(dev_change_flags);
4118 EXPORT_SYMBOL(dev_set_mtu);
4119 EXPORT_SYMBOL(dev_set_mac_address);
4120 EXPORT_SYMBOL(free_netdev);
4121 EXPORT_SYMBOL(netdev_boot_setup_check);
4122 EXPORT_SYMBOL(netdev_set_master);
4123 EXPORT_SYMBOL(netdev_state_change);
4124 EXPORT_SYMBOL(netif_receive_skb);
4125 EXPORT_SYMBOL(netif_rx);
4126 EXPORT_SYMBOL(register_gifconf);
4127 EXPORT_SYMBOL(register_netdevice);
4128 EXPORT_SYMBOL(register_netdevice_notifier);
4129 EXPORT_SYMBOL(skb_checksum_help);
4130 EXPORT_SYMBOL(synchronize_net);
4131 EXPORT_SYMBOL(unregister_netdevice);
4132 EXPORT_SYMBOL(unregister_netdevice_notifier);
4133 EXPORT_SYMBOL(net_enable_timestamp);
4134 EXPORT_SYMBOL(net_disable_timestamp);
4135 EXPORT_SYMBOL(dev_get_flags);
4136
4137 #if defined(CONFIG_BRIDGE) || defined(CONFIG_BRIDGE_MODULE)
4138 EXPORT_SYMBOL(br_handle_frame_hook);
4139 EXPORT_SYMBOL(br_fdb_get_hook);
4140 EXPORT_SYMBOL(br_fdb_put_hook);
4141 #endif
4142
4143 #ifdef CONFIG_KMOD
4144 EXPORT_SYMBOL(dev_load);
4145 #endif
4146
4147 EXPORT_PER_CPU_SYMBOL(softnet_data);