patch-2.3.6 linux/net/core/dev.c
Next file: linux/net/core/dev_mcast.c
Previous file: linux/mm/swapfile.c
Back to the patch index
Back to the overall index
- Lines: 498
- Date:
Wed Jun 9 14:45:36 1999
- Orig file:
v2.3.5/linux/net/core/dev.c
- Orig date:
Wed Jun 2 14:44:39 1999
diff -u --recursive --new-file v2.3.5/linux/net/core/dev.c linux/net/core/dev.c
@@ -263,13 +263,13 @@
{
struct device *dev;
- read_lock_bh(&dev_base_lock);
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (strcmp(dev->name, name) == 0)
goto out;
}
out:
- read_unlock_bh(&dev_base_lock);
+ read_unlock(&dev_base_lock);
return dev;
}
@@ -277,13 +277,13 @@
{
struct device *dev;
- read_lock_bh(&dev_base_lock);
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->ifindex == ifindex)
goto out;
}
out:
- read_unlock_bh(&dev_base_lock);
+ read_unlock(&dev_base_lock);
return dev;
}
@@ -291,14 +291,14 @@
{
struct device *dev;
- read_lock_bh(&dev_base_lock);
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
if (dev->type == type &&
memcmp(dev->dev_addr, ha, dev->addr_len) == 0)
goto out;
}
out:
- read_unlock_bh(&dev_base_lock);
+ read_unlock(&dev_base_lock);
return dev;
}
@@ -321,7 +321,7 @@
}
return -ENFILE; /* Over 100 of the things .. bail out! */
}
-
+
struct device *dev_alloc(const char *name, int *err)
{
struct device *dev=kmalloc(sizeof(struct device)+16, GFP_KERNEL);
@@ -387,9 +387,6 @@
if (dev->flags&IFF_UP)
return 0;
- /* Setup the lock before we open the faucet. */
- spin_lock_init(&dev->xmit_lock);
-
/*
* Call device private open method
*/
@@ -452,10 +449,10 @@
if (dev) {
dev_do_clear_fastroute(dev);
} else {
- read_lock_bh(&dev_base_lock);
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev; dev = dev->next)
dev_do_clear_fastroute(dev);
- read_unlock_bh(&dev_base_lock);
+ read_unlock(&dev_base_lock);
}
}
#endif
@@ -596,59 +593,61 @@
struct device *dev = skb->dev;
struct Qdisc *q;
-#ifdef CONFIG_NET_PROFILE
- start_bh_atomic();
- NET_PROFILE_ENTER(dev_queue_xmit);
-#endif
-
- spin_lock_bh(&dev->xmit_lock);
+ /* Grab device queue */
+ spin_lock_bh(&dev->queue_lock);
q = dev->qdisc;
if (q->enqueue) {
q->enqueue(skb, q);
- qdisc_wakeup(dev);
- spin_unlock_bh(&dev->xmit_lock);
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
+ /* If the device is not busy, kick it.
+ * Otherwise or if queue is not empty after kick,
+ * add it to run list.
+ */
+ if (dev->tbusy || qdisc_restart(dev))
+ qdisc_run(dev->qdisc);
+ spin_unlock_bh(&dev->queue_lock);
return 0;
}
+ spin_unlock_bh(&dev->queue_lock);
/* The device has no queue. Common case for software devices:
loopback, all the sorts of tunnels...
- Really, it is unlikely that bh protection is necessary here:
- virtual devices do not generate EOI events.
- However, it is possible, that they rely on bh protection
+ Really, it is unlikely that xmit_lock protection is necessary here.
+ (f.e. loopback and IP tunnels are clean ignoring statistics counters.)
+ However, it is possible, that they rely on protection
made by us here.
+
+ Check this and shot the lock. It is not prone from deadlocks.
+ Either shot noqueue qdisc, it is even simpler 8)
*/
if (dev->flags&IFF_UP) {
if (netdev_nit)
dev_queue_xmit_nit(skb,dev);
- if (dev->hard_start_xmit(skb, dev) == 0) {
- spin_unlock_bh(&dev->xmit_lock);
-
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
- return 0;
+ local_bh_disable();
+ if (dev->xmit_lock_owner != smp_processor_id()) {
+ spin_lock(&dev->xmit_lock);
+ dev->xmit_lock_owner = smp_processor_id();
+ if (dev->hard_start_xmit(skb, dev) == 0) {
+ dev->xmit_lock_owner = -1;
+ spin_unlock_bh(&dev->xmit_lock);
+ return 0;
+ }
+ dev->xmit_lock_owner = -1;
+ spin_unlock_bh(&dev->xmit_lock);
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
+ } else {
+ /* Recursion is detected! It is possible, unfortunately */
+ local_bh_enable();
+ if (net_ratelimit())
+ printk(KERN_DEBUG "Dead loop on virtual device %s, fix it urgently!\n", dev->name);
}
- if (net_ratelimit())
- printk(KERN_DEBUG "Virtual device %s asks to queue packet!\n", dev->name);
}
- spin_unlock_bh(&dev->xmit_lock);
kfree_skb(skb);
-
-#ifdef CONFIG_NET_PROFILE
- NET_PROFILE_LEAVE(dev_queue_xmit);
- end_bh_atomic();
-#endif
-
return 0;
}
@@ -660,9 +659,6 @@
int netdev_dropping = 0;
int netdev_max_backlog = 300;
atomic_t netdev_rx_dropped;
-#ifdef CONFIG_CPU_IS_SLOW
-int net_cpu_congestion;
-#endif
#ifdef CONFIG_NET_HW_FLOWCONTROL
int netdev_throttle_events;
@@ -852,14 +848,6 @@
struct packet_type *pt_prev;
unsigned short type;
unsigned long start_time = jiffies;
-#ifdef CONFIG_CPU_IS_SLOW
- static unsigned long start_busy = 0;
- static unsigned long ave_busy = 0;
-
- if (start_busy == 0)
- start_busy = start_time;
- net_cpu_congestion = ave_busy>>8;
-#endif
NET_PROFILE_ENTER(net_bh);
/*
@@ -869,9 +857,9 @@
* latency on a transmit interrupt bh.
*/
- if (qdisc_head.forw != &qdisc_head)
+ if (qdisc_pending())
qdisc_run_queues();
-
+
/*
* Any data left to process. This may occur because a
* mark_bh() is done after we empty the queue including
@@ -899,19 +887,6 @@
*/
skb = skb_dequeue(&backlog);
-#ifdef CONFIG_CPU_IS_SLOW
- if (ave_busy > 128*16) {
- kfree_skb(skb);
- while ((skb = skb_dequeue(&backlog)) != NULL)
- kfree_skb(skb);
- break;
- }
-#endif
-
-
-#if 0
- NET_PROFILE_SKB_PASSED(skb, net_bh_skb);
-#endif
#ifdef CONFIG_NET_FASTROUTE
if (skb->pkt_type == PACKET_FASTROUTE) {
dev_queue_xmit(skb);
@@ -1022,16 +997,9 @@
* One last output flush.
*/
- if (qdisc_head.forw != &qdisc_head)
+ if (qdisc_pending())
qdisc_run_queues();
-#ifdef CONFIG_CPU_IS_SLOW
- if (1) {
- unsigned long start_idle = jiffies;
- ave_busy += ((start_idle - start_busy)<<3) - (ave_busy>>4);
- start_busy = 0;
- }
-#endif
#ifdef CONFIG_NET_HW_FLOWCONTROL
if (netdev_dropping)
netdev_wakeup();
@@ -1065,14 +1033,6 @@
*/
/*
- * This call is useful, but I'd remove it too.
- *
- * The reason is purely aestetical, it is the only call
- * from SIOC* family using struct ifreq in reversed manner.
- * Besides that, it is pretty silly to put "drawing" facility
- * to kernel, it is useful only to print ifindices
- * in readable form, is not it? --ANK
- *
* We need this ioctl for efficient implementation of the
* if_indextoname() function required by the IPv6 API. Without
* it, we would have to search all the interfaces to find a
@@ -1138,7 +1098,7 @@
*/
total = 0;
- read_lock_bh(&dev_base_lock);
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
for (i=0; i<NPROTO; i++) {
if (gifconf_list[i]) {
@@ -1152,7 +1112,7 @@
}
}
}
- read_unlock_bh(&dev_base_lock);
+ read_unlock(&dev_base_lock);
if(pos != NULL) {
int err = copy_to_user(ifc.ifc_buf, pos, total);
@@ -1232,7 +1192,7 @@
len+=size;
- read_lock_bh(&dev_base_lock);
+ read_lock(&dev_base_lock);
for (dev = dev_base; dev != NULL; dev = dev->next) {
size = sprintf_stats(buffer+len, dev);
len+=size;
@@ -1245,7 +1205,7 @@
if(pos>offset+length)
break;
}
- read_unlock_bh(&dev_base_lock);
+ read_unlock(&dev_base_lock);
*start=buffer+(offset-begin); /* Start of wanted data */
len-=(offset-begin); /* Start slop */
@@ -1347,7 +1307,7 @@
pos+=size;
len+=size;
- read_lock_bh(&dev_base_lock);
+ read_lock(&dev_base_lock);
for(dev = dev_base; dev != NULL; dev = dev->next) {
size = sprintf_wireless_stats(buffer+len, dev);
len+=size;
@@ -1360,7 +1320,7 @@
if(pos > offset + length)
break;
}
- read_unlock_bh(&dev_base_lock);
+ read_unlock(&dev_base_lock);
*start = buffer + (offset - begin); /* Start of wanted data */
len -= (offset - begin); /* Start slop */
@@ -1736,11 +1696,10 @@
if (IW_IS_SET(cmd)) {
if (!suser())
return -EPERM;
- rtnl_lock();
}
+ rtnl_lock();
ret = dev_ifsioc(&ifr, cmd);
- if (IW_IS_SET(cmd))
- rtnl_unlock();
+ rtnl_unlock();
if (!ret && IW_IS_GET(cmd) &&
copy_to_user(arg, &ifr, sizeof(struct ifreq)))
return -EFAULT;
@@ -1769,6 +1728,10 @@
{
struct device *d, **dp;
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
+
if (dev_boot_phase) {
/* This is NOT bug, but I am not sure, that all the
devices, initialized before netdev module is started
@@ -1784,14 +1747,13 @@
printk(KERN_INFO "early initialization of device %s is deferred\n", dev->name);
/* Check for existence, and append to tail of chain */
- write_lock_bh(&dev_base_lock);
for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
if (d == dev || strcmp(d->name, dev->name) == 0) {
- write_unlock_bh(&dev_base_lock);
return -EEXIST;
}
}
dev->next = NULL;
+ write_lock_bh(&dev_base_lock);
*dp = dev;
write_unlock_bh(&dev_base_lock);
return 0;
@@ -1803,24 +1765,22 @@
if (dev->init && dev->init(dev) != 0)
return -EIO;
+ dev->ifindex = dev_new_index();
+ if (dev->iflink == -1)
+ dev->iflink = dev->ifindex;
+
/* Check for existence, and append to tail of chain */
- write_lock_bh(&dev_base_lock);
for (dp=&dev_base; (d=*dp) != NULL; dp=&d->next) {
if (d == dev || strcmp(d->name, dev->name) == 0) {
- write_unlock_bh(&dev_base_lock);
return -EEXIST;
}
}
dev->next = NULL;
dev_init_scheduler(dev);
+ write_lock_bh(&dev_base_lock);
*dp = dev;
write_unlock_bh(&dev_base_lock);
- dev->ifindex = -1;
- dev->ifindex = dev_new_index();
- if (dev->iflink == -1)
- dev->iflink = dev->ifindex;
-
/* Notify protocols, that a new device appeared. */
notifier_call_chain(&netdev_chain, NETDEV_REGISTER, dev);
@@ -1831,15 +1791,35 @@
{
struct device *d, **dp;
- if (dev_boot_phase == 0) {
- /* If device is running, close it.
- It is very bad idea, really we should
- complain loudly here, but random hackery
- in linux/drivers/net likes it.
- */
- if (dev->flags & IFF_UP)
- dev_close(dev);
+ /* If device is running, close it first. */
+ if (dev->flags & IFF_UP)
+ dev_close(dev);
+ /* And unlink it from device chain. */
+ for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
+ if (d == dev) {
+ write_lock_bh(&dev_base_lock);
+ *dp = d->next;
+ write_unlock_bh(&dev_base_lock);
+
+ /* Sorry. It is known "feature". The race is clear.
+ Keep it after device reference counting will
+ be complete.
+ */
+ synchronize_bh();
+ break;
+ }
+ }
+ if (d == NULL)
+ return -ENODEV;
+
+ /* It is "synchronize_bh" to those of guys, who overslept
+ in skb_alloc/page fault etc. that device is off-line.
+ Again, it can be removed only if devices are refcounted.
+ */
+ dev_lock_wait();
+
+ if (dev_boot_phase == 0) {
#ifdef CONFIG_NET_FASTROUTE
dev_clear_fastroute(dev);
#endif
@@ -1856,27 +1836,11 @@
* Flush the multicast chain
*/
dev_mc_discard(dev);
-
- /* To avoid pointers looking to nowhere,
- we wait for end of critical section */
- dev_lock_wait();
}
- /* And unlink it from device chain. */
- write_lock_bh(&dev_base_lock);
- for (dp = &dev_base; (d=*dp) != NULL; dp=&d->next) {
- if (d == dev) {
- *dp = d->next;
- d->next = NULL;
- write_unlock_bh(&dev_base_lock);
-
- if (dev->destructor)
- dev->destructor(dev);
- return 0;
- }
- }
- write_unlock_bh(&dev_base_lock);
- return -ENODEV;
+ if (dev->destructor)
+ dev->destructor(dev);
+ return 0;
}
@@ -2018,16 +1982,24 @@
* If the call to dev->init fails, the dev is removed
* from the chain disconnecting the device until the
* next reboot.
+ *
+ * NB At boot phase networking is dead. No locking is required.
+ * But we still preserve dev_base_lock for sanity.
*/
dp = &dev_base;
while ((dev = *dp) != NULL) {
+ spin_lock_init(&dev->queue_lock);
+ spin_lock_init(&dev->xmit_lock);
+ dev->xmit_lock_owner = -1;
dev->iflink = -1;
if (dev->init && dev->init(dev)) {
/*
* It failed to come up. Unhook it.
*/
+ write_lock_bh(&dev_base_lock);
*dp = dev->next;
+ write_unlock_bh(&dev_base_lock);
} else {
dp = &dev->next;
dev->ifindex = dev_new_index();
@@ -2055,6 +2027,7 @@
dev_boot_phase = 0;
+ dst_init();
dev_mcast_init();
#ifdef CONFIG_IP_PNP
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen (who was at: slshen@lbl.gov)