patch-2.1.28 linux/net/ipv4/af_inet.c
Next file: linux/net/ipv4/icmp.c
Previous file: linux/net/core/sock.c
Back to the patch index
Back to the overall index
- Lines: 1478
- Date:
Mon Mar 3 09:37:44 1997
- Orig file:
v2.1.27/linux/net/ipv4/af_inet.c
- Orig date:
Thu Feb 27 10:57:32 1997
diff -u --recursive --new-file v2.1.27/linux/net/ipv4/af_inet.c linux/net/ipv4/af_inet.c
@@ -50,6 +50,8 @@
* Alan Cox : Loosened bind a little.
* Mike McLagan : ADD/DEL DLCI Ioctls
* Willy Konynenberg : Transparent proxying support.
+ * David S. Miller : New socket lookup architecture.
+ * Some other random speedups.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
@@ -119,10 +121,6 @@
extern int udp_get_info(char *, char **, off_t, int, int);
-struct sock * tcp_sock_array[SOCK_ARRAY_SIZE];
-struct sock * udp_sock_array[SOCK_ARRAY_SIZE];
-struct sock * raw_sock_array[SOCK_ARRAY_SIZE];
-
#ifdef CONFIG_DLCI
extern int dlci_ioctl(unsigned int, void*);
#endif
@@ -134,293 +132,94 @@
int (*rarp_ioctl_hook)(unsigned int,void*) = NULL;
/*
- * See if a socket number is in use.
+ * Destroy an AF_INET socket
*/
-static int sk_inuse(struct proto *prot, int num)
+static __inline__ void kill_sk_queues(struct sock *sk)
{
- struct sock *sk;
+ struct sk_buff *skb;
- for(sk = prot->sock_array[num & (SOCK_ARRAY_SIZE -1 )];
- sk != NULL; sk=sk->next)
- {
- if (sk->num == num)
- return(1);
+ /* First the read buffer. */
+ while((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
+ /* This will take care of closing sockets that were
+ * listening and didn't accept everything.
+ */
+ if (skb->sk != NULL && skb->sk != sk)
+ skb->sk->prot->close(skb->sk, 0);
+ kfree_skb(skb, FREE_READ);
}
- return(0);
-}
+ /* Next, the error queue. */
+ while((skb = skb_dequeue(&sk->error_queue)) != NULL)
+ kfree_skb(skb, FREE_READ);
-/*
- * Pick a new socket number
- */
+ /* Now the backlog. */
+ while((skb=skb_dequeue(&sk->back_log)) != NULL)
+ kfree_skb(skb, FREE_READ);
+}
-unsigned short get_new_socknum(struct proto *prot, unsigned short base)
+static __inline__ void kill_sk_now(struct sock *sk)
{
- static int start=0;
-
- /*
- * Used to cycle through the port numbers so the
- * chances of a confused connection drop.
- */
-
- int i, j;
- int best = 0;
- int size = 32767; /* a big num. */
- struct sock *sk;
+ /* No longer exists. */
+ del_from_prot_sklist(sk);
- if (base == 0)
- base = PROT_SOCK+1+(start & 1023);
- if (base <= PROT_SOCK)
- {
- base += PROT_SOCK+(start & 1023);
- }
-
- /*
- * Now look through the entire array and try to find an empty ptr.
- */
-
- for(i=0; i < SOCK_ARRAY_SIZE; i++)
- {
- j = 0;
- sk = prot->sock_array[(i+base+1) &(SOCK_ARRAY_SIZE -1)];
- while(sk != NULL)
- {
- sk = sk->next;
- j++;
- }
- if (j == 0)
- {
- start =(i+1+start )&1023;
- return(i+base+1);
- }
- if (j < size)
- {
- best = i;
- size = j;
- }
- }
+ /* This is gross, but needed for SOCK_PACKET -DaveM */
+ if(sk->prot->unhash)
+ sk->prot->unhash(sk);
- /* Now make sure the one we want is not in use. */
-
- while(sk_inuse(prot, base +best+1))
- {
- best += SOCK_ARRAY_SIZE;
- }
- return(best+base+1);
+ if(sk->opt)
+ kfree(sk->opt);
+ dst_release(sk->dst_cache);
+ sk_free(sk);
}
-/*
- * Add a socket into the socket tables by number.
- */
-
-void inet_put_sock(unsigned short num, struct sock *sk)
+static __inline__ void kill_sk_later(struct sock *sk)
{
- struct sock **skp, *tmp;
- int mask;
- unsigned long flags;
-
- if(sk->type==SOCK_PACKET)
- return;
-
- sk->num = num;
- sk->next = NULL;
- num = num &(SOCK_ARRAY_SIZE -1);
-
+ /* this should never happen. */
+ /* actually it can if an ack has just been sent. */
/*
- * We can't have an interrupt re-enter here.
- */
-
- save_flags(flags);
- cli();
-
- sk->prot->inuse += 1;
- if (sk->prot->highestinuse < sk->prot->inuse)
- sk->prot->highestinuse = sk->prot->inuse;
-
- if (sk->prot->sock_array[num] == NULL)
- {
- sk->prot->sock_array[num] = sk;
- restore_flags(flags);
- return;
- }
-
- restore_flags(flags);
- for(mask = 0xff000000; mask != 0xffffffff; mask = (mask >> 8) | mask)
- {
- if ((mask & sk->rcv_saddr) &&
- (mask & sk->rcv_saddr) != (mask & 0xffffffff))
- {
- mask = mask << 8;
- break;
- }
- }
-
- /*
- * add the socket to the sock_array[]..
+ * It's more normal than that...
+ * It can happen because a skb is still in the device queues
+ * [PR]
*/
- skp = sk->prot->sock_array + num;
- cli();
- while ((tmp = *skp) != NULL) {
- if (!(tmp->rcv_saddr & mask))
- break;
- skp = &tmp->next;
- }
- sk->next = tmp;
- *skp = sk;
- sti();
-}
-
-/*
- * Remove a socket from the socket tables.
- */
-
-void inet_remove_sock(struct sock *sk1)
-{
- struct sock **p;
- unsigned long flags;
-
- if (sk1->type==SOCK_PACKET)
- return;
-
- if (!sk1->prot)
- {
- NETDEBUG(printk("sock.c: remove_sock: sk1->prot == NULL\n"));
- return;
- }
+
+ printk("Socket destroy delayed (r=%d w=%d)\n",
+ sk->rmem_alloc, sk->wmem_alloc);
- /* We can't have this changing out from under us. */
- save_flags(flags);
- cli();
-
- p=&(sk1->prot->sock_array[sk1->num & (SOCK_ARRAY_SIZE -1)]);
-
- while(*p!=NULL)
- {
- if(*p==sk1)
- {
- sk1->prot->inuse--;
- *p=sk1->next;
- break;
- }
- p=&((*p)->next);
- }
- restore_flags(flags);
+ sk->destroy = 1;
+ sk->ack_backlog = 0;
+ release_sock(sk);
+ net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
}
-/*
- * Destroy an AF_INET socket
- */
-
void destroy_sock(struct sock *sk)
{
- struct sk_buff *skb;
-
lock_sock(sk); /* just to be safe. */
- /*
- * Now we can no longer get new packets or once the
- * timers are killed, send them.
+ /* Now we can no longer get new packets or once the
+ * timers are killed, send them.
*/
-
net_delete_timer(sk);
if (sk->prot->destroy)
sk->prot->destroy(sk);
- /*
- * Clean up the read buffer.
- */
-
- while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
- {
- /*
- * This will take care of closing sockets that were
- * listening and didn't accept everything.
- */
- if (skb->sk != NULL && skb->sk != sk)
- {
- IS_SKB(skb);
- skb->sk->prot->close(skb->sk, 0);
- }
- IS_SKB(skb);
- kfree_skb(skb, FREE_READ);
- }
-
- /*
- * Clean up the error queue.
- */
-
- while((skb=skb_dequeue(&sk->error_queue))!=NULL)
- {
- IS_SKB(skb);
- kfree_skb(skb, FREE_READ);
- }
-
- /*
- * Now the backlog.
- */
-
- while((skb=skb_dequeue(&sk->back_log))!=NULL)
- {
- IS_SKB(skb);
- kfree_skb(skb, FREE_READ);
- }
+ kill_sk_queues(sk);
- /*
- * Now if it has a half accepted/ closed socket.
- */
-
- if (sk->pair)
- {
+ /* Now if it has a half accepted/ closed socket. */
+ if (sk->pair) {
sk->pair->prot->close(sk->pair, 0);
sk->pair = NULL;
}
- /*
- * Now if everything is gone we can free the socket
+ /* Now if everything is gone we can free the socket
* structure, otherwise we need to keep it around until
* everything is gone.
*/
-
- if (sk->rmem_alloc == 0 && sk->wmem_alloc == 0)
- {
-/*
- * It is wrong! We MUST unlink socket from socket table
- * even earlier, than it used to be.
- * F.e. TCP socket must be unlinked at the moment, when
- * it goes to TCP_CLOSE. --ANK
- */
- inet_remove_sock(sk);
-
- if(sk->opt)
- kfree(sk->opt);
- dst_release(sk->dst_cache);
- /*
- * This one is pure paranoia. I'll take it out
- * later once I know the bug is buried.
- */
- tcp_cache_zap();
- sk_free(sk);
- }
- else
- {
- /* this should never happen. */
- /* actually it can if an ack has just been sent. */
- /*
- * It's more normal than that...
- * It can happen because a skb is still in the device queues
- * [PR]
- */
-
- printk("Socket destroy delayed (r=%d w=%d)\n",
- sk->rmem_alloc, sk->wmem_alloc);
-
- sk->destroy = 1;
- sk->ack_backlog = 0;
- release_sock(sk);
- net_reset_timer(sk, TIME_DESTROY, SOCK_DESTROY_TIME);
- }
+ if (sk->rmem_alloc == 0 && sk->wmem_alloc == 0)
+ kill_sk_now(sk);
+ else
+ kill_sk_later(sk);
}
/*
@@ -467,15 +266,13 @@
static int inet_autobind(struct sock *sk)
{
/* We may need to bind the socket. */
- if (sk->num == 0)
- {
- sk->num = get_new_socknum(sk->prot, 0);
+ if (sk->num == 0) {
+ sk->num = sk->prot->good_socknum();
if (sk->num == 0)
return(-EAGAIN);
- udp_cache_zap();
- tcp_cache_zap();
- inet_put_sock(sk->num, sk);
- sk->dummy_th.source = ntohs(sk->num);
+ sk->dummy_th.source = htons(sk->num);
+ sk->prot->hash(sk);
+ add_to_prot_sklist(sk);
}
return 0;
}
@@ -491,7 +288,7 @@
if (sock->state != SS_UNCONNECTED || sock->type != SOCK_STREAM)
return(-EINVAL);
- if (inet_autobind(sk)!=0)
+ if (inet_autobind(sk) != 0)
return -EAGAIN;
/* We might as well re use these. */
@@ -506,10 +303,11 @@
if ((unsigned) backlog > SOMAXCONN)
backlog = SOMAXCONN;
sk->max_ack_backlog = backlog;
- if (sk->state != TCP_LISTEN)
- {
+ if (sk->state != TCP_LISTEN) {
sk->ack_backlog = 0;
sk->state = TCP_LISTEN;
+ sk->prot->rehash(sk);
+ add_to_prot_sklist(sk);
}
sk->socket->flags |= SO_ACCEPTCON;
return(0);
@@ -526,87 +324,44 @@
{
struct sock *sk;
struct proto *prot;
- int err;
sock->state = SS_UNCONNECTED;
sk = sk_alloc(GFP_KERNEL);
if (sk == NULL)
- return(-ENOBUFS);
+ goto do_oom;
- /*
- * Note for tcp that also wiped the dummy_th block for us.
- */
- switch (sock->type)
- {
- case SOCK_STREAM:
- case SOCK_SEQPACKET:
- if (protocol && protocol != IPPROTO_TCP)
- {
- sk_free(sk);
- return(-EPROTONOSUPPORT);
- }
- protocol = IPPROTO_TCP;
- sk->no_check = TCP_NO_CHECK;
- if (ipv4_config.no_pmtu_disc)
- sk->ip_pmtudisc = IP_PMTUDISC_DONT;
- else
- sk->ip_pmtudisc = IP_PMTUDISC_WANT;
- prot = &tcp_prot;
- sock->ops = &inet_stream_ops;
- break;
-
- case SOCK_DGRAM:
- if (protocol && protocol != IPPROTO_UDP)
- {
- sk_free(sk);
- return(-EPROTONOSUPPORT);
- }
- protocol = IPPROTO_UDP;
- sk->no_check = UDP_NO_CHECK;
+ /* Note for tcp that also wiped the dummy_th block for us. */
+ if(sock->type == SOCK_STREAM || sock->type == SOCK_SEQPACKET) {
+ if (protocol && protocol != IPPROTO_TCP)
+ goto free_and_noproto;
+ protocol = IPPROTO_TCP;
+ sk->no_check = TCP_NO_CHECK;
+ if (ipv4_config.no_pmtu_disc)
sk->ip_pmtudisc = IP_PMTUDISC_DONT;
- prot=&udp_prot;
- sock->ops = &inet_dgram_ops;
- break;
-
- case SOCK_RAW:
- if (!suser())
- {
- sk_free(sk);
- return(-EPERM);
- }
- if (!protocol)
- {
- sk_free(sk);
- return(-EPROTONOSUPPORT);
- }
- prot = &raw_prot;
- sk->reuse = 1;
- sk->ip_pmtudisc = IP_PMTUDISC_DONT;
- sk->num = protocol;
- sock->ops = &inet_dgram_ops;
- break;
-
- case SOCK_PACKET:
- if (!suser())
- {
- sk_free(sk);
- return(-EPERM);
- }
- if (!protocol)
- {
- sk_free(sk);
- return(-EPROTONOSUPPORT);
- }
- prot = &packet_prot;
- sk->reuse = 1;
- sk->ip_pmtudisc = IP_PMTUDISC_DONT;
- sk->num = protocol;
- sock->ops = &inet_dgram_ops;
- break;
-
- default:
- sk_free(sk);
- return(-ESOCKTNOSUPPORT);
+ else
+ sk->ip_pmtudisc = IP_PMTUDISC_WANT;
+ prot = &tcp_prot;
+ sock->ops = &inet_stream_ops;
+ } else if(sock->type == SOCK_DGRAM) {
+ if (protocol && protocol != IPPROTO_UDP)
+ goto free_and_noproto;
+ protocol = IPPROTO_UDP;
+ sk->no_check = UDP_NO_CHECK;
+ sk->ip_pmtudisc = IP_PMTUDISC_DONT;
+ prot=&udp_prot;
+ sock->ops = &inet_dgram_ops;
+ } else if(sock->type == SOCK_RAW || sock->type == SOCK_PACKET) {
+ if (!suser())
+ goto free_and_badperm;
+ if (!protocol)
+ goto free_and_noproto;
+ prot = (sock->type == SOCK_RAW) ? &raw_prot : &packet_prot;
+ sk->reuse = 1;
+ sk->ip_pmtudisc = IP_PMTUDISC_DONT;
+ sk->num = protocol;
+ sock->ops = &inet_dgram_ops;
+ } else {
+ goto free_and_badtype;
}
sock_init_data(sock,sk);
@@ -636,33 +391,47 @@
sk->ip_mc_index=0;
sk->ip_mc_list=NULL;
- /*
- * Speed up by setting some standard state for the dummy_th
+ /* Speed up by setting some standard state for the dummy_th
* if TCP uses it (maybe move to tcp_init later)
*/
- if (sk->num)
- {
- /*
- * It assumes that any protocol which allows
+ if (sk->num) {
+ /* It assumes that any protocol which allows
* the user to assign a number at socket
* creation time automatically
* shares.
*/
- inet_put_sock(sk->num, sk);
sk->dummy_th.source = ntohs(sk->num);
+
+ /* This is gross, but needed for SOCK_PACKET -DaveM */
+ if(sk->prot->hash)
+ sk->prot->hash(sk);
+ add_to_prot_sklist(sk);
}
- if (sk->prot->init)
- {
- err = sk->prot->init(sk);
- if (err != 0)
- {
+ if (sk->prot->init) {
+ int err = sk->prot->init(sk);
+ if (err != 0) {
destroy_sock(sk);
return(err);
}
}
return(0);
+
+free_and_badtype:
+ sk_free(sk);
+ return -ESOCKTNOSUPPORT;
+
+free_and_badperm:
+ sk_free(sk);
+ return -EPERM;
+
+free_and_noproto:
+ sk_free(sk);
+ return -EPROTONOSUPPORT;
+
+do_oom:
+ return -ENOBUFS;
}
@@ -684,193 +453,99 @@
int inet_release(struct socket *sock, struct socket *peersock)
{
struct sock *sk = sock->sk;
- unsigned long timeout;
-
- if (sk==NULL)
- return 0;
-
- if (sock->state != SS_UNCONNECTED)
- sock->state = SS_DISCONNECTING;
-
- sk->state_change(sk);
-
- /* Start closing the connection. This may take a while. */
- /* Applications forget to leave groups before exiting */
- ip_mc_drop_socket(sk);
+ if (sk) {
+ unsigned long timeout;
- /*
- * If linger is set, we don't return until the close
- * is complete. Otherwise we return immediately. The
- * actually closing is done the same either way.
- *
- * If the close is due to the process exiting, we never
- * linger..
- */
- timeout = 0;
- if (sk->linger)
- {
- timeout = ~0UL;
- if (!sk->lingertime)
- timeout = jiffies + HZ*sk->lingertime;
- }
- if (current->flags & PF_EXITING)
+ /* Begin closedown and wake up sleepers. */
+ if (sock->state != SS_UNCONNECTED)
+ sock->state = SS_DISCONNECTING;
+ sk->state_change(sk);
+
+ /* Applications forget to leave groups before exiting */
+ ip_mc_drop_socket(sk);
+
+ /* If linger is set, we don't return until the close
+ * is complete. Otherwise we return immediately. The
+ * actually closing is done the same either way.
+ *
+ * If the close is due to the process exiting, we never
+ * linger..
+ */
timeout = 0;
+ if (sk->linger && !(current->flags & PF_EXITING)) {
+ timeout = ~0UL;
- sock->sk = NULL;
- sk->socket = NULL;
-
- sk->prot->close(sk, timeout);
+ /* XXX This makes no sense whatsoever... -DaveM */
+ if (!sk->lingertime)
+ timeout = jiffies + HZ*sk->lingertime;
+ }
+ sock->sk = NULL;
+ sk->socket = NULL;
+ sk->prot->close(sk, timeout);
+ }
return(0);
}
-
-static int inet_bind(struct socket *sock, struct sockaddr *uaddr,
- int addr_len)
+static int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
{
struct sockaddr_in *addr=(struct sockaddr_in *)uaddr;
- struct sock *sk=sock->sk, *sk2;
- unsigned short snum = 0 /* Stoopid compiler.. this IS ok */;
+ struct sock *sk=sock->sk;
+ unsigned short snum;
int chk_addr_ret;
- /*
- * If the socket has its own bind function then use it.
- */
-
+ /* If the socket has its own bind function then use it. (RAW and PACKET) */
if(sk->prot->bind)
- return sk->prot->bind(sk,uaddr, addr_len);
+ return sk->prot->bind(sk, uaddr, addr_len);
- /* check this error. */
- if (sk->state != TCP_CLOSE)
- return(-EINVAL);
- if(addr_len<sizeof(struct sockaddr_in))
+ /* Check these errors (active socket, bad address length, double bind). */
+ if ((sk->state != TCP_CLOSE) ||
+ (addr_len < sizeof(struct sockaddr_in)) ||
+ (sk->num != 0))
return -EINVAL;
- if (sock->type != SOCK_RAW)
- {
- if (sk->num != 0)
- return(-EINVAL);
-
- snum = ntohs(addr->sin_port);
-
+ snum = ntohs(addr->sin_port);
#ifdef CONFIG_IP_MASQUERADE
- /*
- * The kernel masquerader needs some ports
- */
- if(snum>=PORT_MASQ_BEGIN && snum<=PORT_MASQ_END)
- return -EADDRINUSE;
+ /* The kernel masquerader needs some ports. */
+ if((snum >= PORT_MASQ_BEGIN) && (snum <= PORT_MASQ_END))
+ return -EADDRINUSE;
#endif
-
- if (snum == 0)
- snum = get_new_socknum(sk->prot, 0);
- if (snum < PROT_SOCK && !suser())
- return(-EACCES);
- }
+ if (snum == 0)
+ snum = sk->prot->good_socknum();
+ if (snum < PROT_SOCK && !suser())
+ return(-EACCES);
chk_addr_ret = __ip_chk_addr(addr->sin_addr.s_addr);
+ if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR &&
+ chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST) {
#ifdef CONFIG_IP_TRANSPARENT_PROXY
- /*
- * Superuser may bind to any address to allow transparent proxying.
- */
- if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST && !suser())
-#else
- if (addr->sin_addr.s_addr != 0 && chk_addr_ret != IS_MYADDR && chk_addr_ret != IS_MULTICAST && chk_addr_ret != IS_BROADCAST)
-#endif
- return(-EADDRNOTAVAIL); /* Source address MUST be ours! */
-
-#ifndef CONFIG_IP_TRANSPARENT_PROXY
- /*
- * Am I just thick or is this test really always true after the one
- * above? Just taking the test out appears to be the easiest way to
- * make binds to remote addresses for transparent proxying work.
- */
- if (chk_addr_ret || addr->sin_addr.s_addr == 0)
- {
+ /* Superuser may bind to any address to allow transparent proxying. */
+ if(!suser())
#endif
- /*
- * We keep a pair of addresses. rcv_saddr is the one
- * used by get_sock_*(), and saddr is used for transmit.
- *
- * In the BSD API these are the same except where it
- * would be illegal to use them (multicast/broadcast) in
- * which case the sending device address is used.
- */
- sk->rcv_saddr = addr->sin_addr.s_addr;
- if(chk_addr_ret==IS_MULTICAST||chk_addr_ret==IS_BROADCAST)
- sk->saddr = 0; /* Use device */
- else
- sk->saddr = addr->sin_addr.s_addr;
-#ifndef CONFIG_IP_TRANSPARENT_PROXY
+ return -EADDRNOTAVAIL; /* Source address MUST be ours! */
}
-#endif
- if (sock->type != SOCK_RAW)
- {
- /* Make sure we are allowed to bind here. */
- cli();
- for(sk2 = sk->prot->sock_array[snum & (SOCK_ARRAY_SIZE -1)];
- sk2 != NULL; sk2 = sk2->next)
- {
- /*
- * Hash collision or real match ?
- */
-
- if (sk2->num != snum)
- continue;
-
- /*
- * Either bind on the port is wildcard means
- * they will overlap and thus be in error
- */
-
- if (!sk2->rcv_saddr || !sk->rcv_saddr)
- {
- /*
- * Allow only if both are setting reuse.
- */
- if(sk2->reuse && sk->reuse && sk2->state!=TCP_LISTEN)
- continue;
- sti();
- return(-EADDRINUSE);
- }
-
- /*
- * Two binds match ?
- */
-
- if (sk2->rcv_saddr != sk->rcv_saddr)
- continue;
- /*
- * Reusable port ?
- */
-
- if (!sk->reuse)
- {
- sti();
- return(-EADDRINUSE);
- }
-
- /*
- * Reuse ?
- */
-
- if (!sk2->reuse || sk2->state==TCP_LISTEN)
- {
- sti();
- return(-EADDRINUSE);
- }
- }
- sti();
- inet_remove_sock(sk);
- if (sock->type==SOCK_DGRAM)
- udp_cache_zap();
- if (sock->type==SOCK_STREAM)
- tcp_cache_zap();
- inet_put_sock(snum, sk);
- sk->dummy_th.source = ntohs(sk->num);
- sk->daddr = 0;
- sk->dummy_th.dest = 0;
- }
+ /* We keep a pair of addresses. rcv_saddr is the one
+ * used by hash lookups, and saddr is used for transmit.
+ *
+ * In the BSD API these are the same except where it
+ * would be illegal to use them (multicast/broadcast) in
+ * which case the sending device address is used.
+ */
+ sk->rcv_saddr = sk->saddr = addr->sin_addr.s_addr;
+ if(chk_addr_ret == IS_MULTICAST || chk_addr_ret == IS_BROADCAST)
+ sk->saddr = 0; /* Use device */
+
+ /* Make sure we are allowed to bind here. */
+ if(sk->prot->verify_bind(sk, snum))
+ return -EADDRINUSE;
+
+ sk->num = snum;
+ sk->dummy_th.source = ntohs(snum);
+ sk->daddr = 0;
+ sk->dummy_th.dest = 0;
+ sk->prot->rehash(sk);
+ add_to_prot_sklist(sk);
dst_release(sk->dst_cache);
sk->dst_cache=NULL;
return(0);
@@ -882,7 +557,7 @@
struct sock *sk=sock->sk;
int err;
- if (inet_autobind(sk)!=0)
+ if (inet_autobind(sk) != 0)
return(-EAGAIN);
if (sk->prot->connect == NULL)
return(-EOPNOTSUPP);
@@ -903,85 +578,61 @@
struct sock *sk=sock->sk;
int err;
- switch (sock->state)
- {
- case SS_UNCONNECTED:
- /* This is ok... continue with connect */
- break;
- case SS_CONNECTED:
- /* Socket is already connected */
+ if(sock->state != SS_UNCONNECTED && sock->state != SS_CONNECTING) {
+ if(sock->state == SS_CONNECTED)
return -EISCONN;
- case SS_CONNECTING:
- /* Not yet connected... we will check this. */
-
- /*
- * FIXME: for all protocols what happens if you start
- * an async connect fork and both children connect. Clean
- * this up in the protocols!
- */
- break;
- default:
- return(-EINVAL);
- }
-
- if (sock->state == SS_CONNECTING && tcp_connected(sk->state))
- {
- sock->state = SS_CONNECTED;
- /* Connection completing after a connect/EINPROGRESS/select/connect */
- return 0; /* Rock and roll */
+ return -EINVAL;
}
- if (sock->state == SS_CONNECTING && sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK))
- {
- if(sk->err!=0)
- return sock_error(sk);
- return -EALREADY; /* Connecting is currently in progress */
- }
- if (sock->state != SS_CONNECTING)
- {
+ if(sock->state == SS_CONNECTING) {
+ if(tcp_connected(sk->state)) {
+ sock->state = SS_CONNECTED;
+ return 0;
+ }
+ if(sk->protocol == IPPROTO_TCP && (flags & O_NONBLOCK)) {
+ if(sk->err)
+ return sock_error(sk);
+ return -EALREADY;
+ }
+ } else {
/* We may need to bind the socket. */
- if (inet_autobind(sk)!=0)
+ if (inet_autobind(sk) != 0)
return(-EAGAIN);
if (sk->prot->connect == NULL)
return(-EOPNOTSUPP);
err = sk->prot->connect(sk, uaddr, addr_len);
- if (err < 0)
+ if (err < 0)
return(err);
sock->state = SS_CONNECTING;
}
- if (sk->state > TCP_FIN_WAIT2 && sock->state==SS_CONNECTING)
- {
- sock->state=SS_UNCONNECTED;
+ if (sk->state > TCP_FIN_WAIT2 && sock->state == SS_CONNECTING) {
+ sock->state = SS_UNCONNECTED;
return sock_error(sk);
}
if (sk->state != TCP_ESTABLISHED && (flags & O_NONBLOCK))
return (-EINPROGRESS);
- cli(); /* avoid the race condition */
- while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
- {
+ cli();
+ while(sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV) {
interruptible_sleep_on(sk->sleep);
- if (current->signal & ~current->blocked)
- {
+ if (current->signal & ~current->blocked) {
sti();
return(-ERESTARTSYS);
}
/* This fixes a nasty in the tcp/ip code. There is a hideous hassle with
icmp error packets wanting to close a tcp or udp socket. */
- if (sk->err && sk->protocol == IPPROTO_TCP)
- {
+ if (sk->err && sk->protocol == IPPROTO_TCP) {
sock->state = SS_UNCONNECTED;
sti();
return sock_error(sk); /* set by tcp_err() */
}
}
sti();
- sock->state = SS_CONNECTED;
- if (sk->state != TCP_ESTABLISHED && sk->err)
- {
+ sock->state = SS_CONNECTED;
+ if ((sk->state != TCP_ESTABLISHED) && sk->err) {
sock->state = SS_UNCONNECTED;
return sock_error(sk);
}
@@ -994,32 +645,24 @@
int inet_accept(struct socket *sock, struct socket *newsock, int flags)
{
- struct sock *sk1 = sock->sk;
+ struct sock *sk1 = sock->sk, *sk2;
struct sock *newsk = newsock->sk;
- struct sock *sk2;
- int err;
+ int err = -EINVAL;
- if (sock->state != SS_UNCONNECTED)
- return -EINVAL;
- if (!(sock->flags & SO_ACCEPTCON))
- return -EINVAL;
- if (sk1->prot->accept == NULL)
- return -EOPNOTSUPP;
+ if (sock->state != SS_UNCONNECTED || !(sock->flags & SO_ACCEPTCON))
+ goto do_err;
- /*
- * Restore the state if we have been interrupted, and then returned.
- */
-
- if (sk1->pair != NULL )
- {
+ err = -EOPNOTSUPP;
+ if (sk1->prot->accept == NULL)
+ goto do_err;
+
+ /* Restore the state if we have been interrupted, and then returned. */
+ if (sk1->pair != NULL) {
sk2 = sk1->pair;
sk1->pair = NULL;
- }
- else
- {
- sk2 = sk1->prot->accept(sk1,flags);
- if (sk2 == NULL)
- return sock_error(sk1);
+ } else {
+ if((sk2 = sk1->prot->accept(sk1,flags)) == NULL)
+ goto do_sk1_err;
}
/*
@@ -1027,7 +670,6 @@
* We need to free it up because the tcp module creates
* its own when it accepts one.
*/
-
sk2->sleep = newsk->sleep;
newsock->sk = sk2;
@@ -1035,56 +677,54 @@
newsk->socket = NULL;
if (flags & O_NONBLOCK)
- {
- destroy_sock(newsk);
- return(0);
- }
+ goto do_half_success;
- cli(); /* avoid the race. */
- while (sk2->state == TCP_SYN_RECV)
- {
+ cli();
+ while (sk2->state == TCP_SYN_RECV) {
interruptible_sleep_on(sk2->sleep);
- if (current->signal & ~current->blocked)
- {
- sti();
- sk1->pair = sk2;
- sk2->sleep = NULL;
- sk2->socket = NULL;
-
- newsock->sk = newsk;
- newsk->socket = newsock;
- return -ERESTARTSYS;
- }
+ if (current->signal & ~current->blocked)
+ goto do_interrupted;
}
sti();
-
- if (sk2->state != TCP_ESTABLISHED && sk2->err > 0)
- {
- err = sock_error(sk2);
- sk2->sleep = NULL;
- sk2->socket = NULL;
- destroy_sock(sk2);
-
- newsock->sk = newsk;
- newsk->socket = newsock;
-
- return err;
- }
+ if(sk2->state == TCP_ESTABLISHED)
+ goto do_full_success;
+ if(sk2->err > 0)
+ goto do_connect_err;
+ err = -ECONNABORTED;
if (sk2->state == TCP_CLOSE)
- {
- sk2->sleep = NULL;
- sk2->socket = NULL;
- destroy_sock(sk2);
-
- newsock->sk = newsk;
- newsk->socket = newsock;
-
- return -ECONNABORTED;
- }
-
+ goto do_bad_connection;
+do_full_success:
destroy_sock(newsk);
newsock->state = SS_CONNECTED;
+ return 0;
+
+do_half_success:
+ destroy_sock(newsk);
return(0);
+
+do_connect_err:
+ err = sock_error(sk2);
+do_bad_connection:
+ sk2->sleep = NULL;
+ sk2->socket = NULL;
+ destroy_sock(sk2);
+ newsock->sk = newsk;
+ newsk->socket = newsock;
+ return err;
+
+do_interrupted:
+ sti();
+ sk1->pair = sk2;
+ sk2->sleep = NULL;
+ sk2->socket = NULL;
+ newsock->sk = newsk;
+ newsk->socket = newsock;
+ err = -ERESTARTSYS;
+do_err:
+ return err;
+do_sk1_err:
+ err = sock_error(sk1);
+ return err;
}
@@ -1095,19 +735,16 @@
static int inet_getname(struct socket *sock, struct sockaddr *uaddr,
int *uaddr_len, int peer)
{
- struct sock *sk=sock->sk;
- struct sockaddr_in *sin=(struct sockaddr_in *)uaddr;
+ struct sock *sk = sock->sk;
+ struct sockaddr_in *sin = (struct sockaddr_in *)uaddr;
sin->sin_family = AF_INET;
- if (peer)
- {
+ if (peer) {
if (!tcp_connected(sk->state))
return(-ENOTCONN);
sin->sin_port = sk->dummy_th.dest;
sin->sin_addr.s_addr = sk->daddr;
- }
- else
- {
+ } else {
__u32 addr = sk->rcv_saddr;
if (!addr)
addr = sk->saddr;
@@ -1134,7 +771,7 @@
if (sk->err)
return sock_error(sk);
/* We may need to bind the socket. */
- if (inet_autobind(sk)!=0)
+ if (inet_autobind(sk) != 0)
return(-EAGAIN);
err = sk->prot->recvmsg(sk, msg, size, flags&MSG_DONTWAIT,
flags&~MSG_DONTWAIT, &addr_len);
@@ -1157,9 +794,11 @@
return(-EOPNOTSUPP);
if(sk->err)
return sock_error(sk);
+
/* We may need to bind the socket. */
- if(inet_autobind(sk)!=0)
+ if(inet_autobind(sk) != 0)
return -EAGAIN;
+
return sk->prot->sendmsg(sk, msg, size);
}
@@ -1168,8 +807,7 @@
{
struct sock *sk = sock->sk;
- /*
- * This should really check to make sure
+ /* This should really check to make sure
* the socket is a TCP socket. (WHY AC...)
*/
how++; /* maps 0->1 has the advantage of making bit 1 rcvs and
@@ -1329,256 +967,6 @@
return(0);
}
-#ifdef CONFIG_IP_TRANSPARENT_PROXY
-/*
- * Some routines for the for loop in get_sock which sometimes needs to walk
- * two linked lists in sequence. Could use macros as well.
- * Does anyone know a nicer way to code this?
- */
-static __inline__ struct sock *secondlist(unsigned short hpnum, struct sock *s,
- int *pfirstpass, struct proto *prot)
-{
- if (s == NULL && (*pfirstpass)-- )
- return prot->sock_array[hpnum & (SOCK_ARRAY_SIZE - 1)];
- else
- return s;
-}
-static __inline__ struct sock *get_sock_loop_init(unsigned short hnum,
- unsigned short hpnum, struct sock *s,
- int *pfirstpass, struct proto *prot)
-{
- s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)];
- return secondlist(hpnum, s, pfirstpass, prot);
-}
-static __inline__ struct sock *get_sock_loop_next(unsigned short hnum,
- unsigned short hpnum, struct sock *s,
- int *pfirstpass, struct proto *prot)
-{
- s = s->next;
- return secondlist(hpnum, s, pfirstpass, prot);
-}
-
-struct sock *get_sock_proxy(struct proto *prot, unsigned short num,
- unsigned long raddr,
- unsigned short rnum, unsigned long laddr,
- unsigned long paddr, unsigned short pnum)
-{
- struct sock *s = 0;
- struct sock *result = NULL;
- int badness = -1;
- unsigned short hnum;
- unsigned short hpnum;
- int firstpass = 1;
-
- hnum = ntohs(num);
- hpnum = ntohs(pnum);
-
- /*
- * SOCK_ARRAY_SIZE must be a power of two. This will work better
- * than a prime unless 3 or more sockets end up using the same
- * array entry. This should not be a problem because most
- * well known sockets don't overlap that much, and for
- * the other ones, we can just be careful about picking our
- * socket number when we choose an arbitrary one.
- */
-
- for(s = get_sock_loop_init(hnum, hpnum, s, &firstpass, prot);
- s != NULL;
- s = get_sock_loop_next(hnum, hpnum, s, &firstpass, prot))
- {
- int score = 0;
-
- /* accept the addressed port or the redirect (proxy) port */
- if (s->num != hnum && s->num != hpnum)
- continue;
-
- if(s->dead && (s->state == TCP_CLOSE))
- continue;
- /* local address matches? */
- if (s->rcv_saddr) {
- /*
- * If this is redirected traffic, it must either
- * match on the redirected port/ip-address or on
- * the actual destination, not on a mixture.
- * There must be a simpler way to express this...
- */
- if ((s->num != hpnum || s->rcv_saddr != paddr)
- && (s->num != hnum || s->rcv_saddr != laddr))
- continue;
- score++;
- }
- /* remote address matches? */
- if (s->daddr) {
- if (s->daddr != raddr)
- continue;
- score++;
- }
- /* remote port matches? */
- if (s->dummy_th.dest) {
- if (s->dummy_th.dest != rnum)
- continue;
- score++;
- }
- /* perfect match? */
- if (score == 3 && s->num == hnum)
- return s;
- /* no, check if this is the best so far.. */
- if (score <= badness)
- continue;
- /* don't accept near matches on the actual destination
- * port with IN_ADDR_ANY for redirected traffic, but do
- * allow explicit remote address listens. (disputable)
- */
- if (s->num != hpnum && !s->rcv_saddr)
- continue;
- result = s;
- badness = score;
- }
- return result;
-}
-#endif
-
-/*
- * This routine must find a socket given a TCP or UDP header.
- * Everything is assumed to be in net order.
- *
- * We give priority to more closely bound ports: if some socket
- * is bound to a particular foreign address, it will get the packet
- * rather than somebody listening to any address..
- */
-
-struct sock *get_sock(struct proto *prot, unsigned short num,
- unsigned long raddr,
- unsigned short rnum, unsigned long laddr)
-{
- struct sock *s = 0;
- struct sock *result = NULL;
- int badness = -1;
- unsigned short hnum;
-
- hnum = ntohs(num);
-
- /*
- * SOCK_ARRAY_SIZE must be a power of two. This will work better
- * than a prime unless 3 or more sockets end up using the same
- * array entry. This should not be a problem because most
- * well known sockets don't overlap that much, and for
- * the other ones, we can just be careful about picking our
- * socket number when we choose an arbitrary one.
- */
-
- for(s = prot->sock_array[hnum & (SOCK_ARRAY_SIZE - 1)];
- s != NULL; s = s->next)
- {
- int score = 0;
-
- if (s->num != hnum)
- continue;
-
- if(s->dead && (s->state == TCP_CLOSE))
- continue;
- /* local address matches? */
- if (s->rcv_saddr) {
- if (s->rcv_saddr != laddr)
- continue;
- score++;
- }
- /* remote address matches? */
- if (s->daddr) {
- if (s->daddr != raddr)
- continue;
- score++;
- }
- /* remote port matches? */
- if (s->dummy_th.dest) {
- if (s->dummy_th.dest != rnum)
- continue;
- score++;
- }
- /* perfect match? */
- if (score == 3)
- return s;
- /* no, check if this is the best so far.. */
- if (score <= badness)
- continue;
- result = s;
- badness = score;
- }
- return result;
-}
-
-
-/*
- * Deliver a datagram to raw sockets.
- */
-
-struct sock *get_sock_raw(struct sock *sk,
- unsigned short num,
- unsigned long raddr,
- unsigned long laddr)
-{
- struct sock *s;
-
- s=sk;
-
- for(; s != NULL; s = s->next)
- {
- if (s->num != num)
- continue;
- if(s->dead && (s->state == TCP_CLOSE))
- continue;
- if(s->daddr && s->daddr!=raddr)
- continue;
- if(s->rcv_saddr && s->rcv_saddr != laddr)
- continue;
- return(s);
- }
- return(NULL);
-}
-
-/*
- * Deliver a datagram to broadcast/multicast sockets.
- */
-
-struct sock *get_sock_mcast(struct sock *sk,
- unsigned short num,
- unsigned long raddr,
- unsigned short rnum, unsigned long laddr)
-{
- struct sock *s;
- unsigned short hnum;
-
- hnum = ntohs(num);
-
- /*
- * SOCK_ARRAY_SIZE must be a power of two. This will work better
- * than a prime unless 3 or more sockets end up using the same
- * array entry. This should not be a problem because most
- * well known sockets don't overlap that much, and for
- * the other ones, we can just be careful about picking our
- * socket number when we choose an arbitrary one.
- */
-
- s=sk;
-
- for(; s != NULL; s = s->next)
- {
- if (s->num != hnum)
- continue;
- if(s->dead && (s->state == TCP_CLOSE))
- continue;
- if(s->daddr && s->daddr!=raddr)
- continue;
- if (s->dummy_th.dest != rnum && s->dummy_th.dest != 0)
- continue;
- if(s->rcv_saddr && s->rcv_saddr != laddr)
- continue;
- return(s);
- }
- return(NULL);
-}
-
-
struct proto_ops inet_stream_ops = {
AF_INET,
@@ -1621,9 +1009,6 @@
inet_recvmsg
};
-
-
-
struct net_proto_family inet_family_ops = {
AF_INET,
inet_create
@@ -1681,8 +1066,6 @@
{
struct sk_buff *dummy_skb;
struct inet_protocol *p;
- int i;
-
printk("Swansea University Computer Society TCP/IP for NET3.037\n");
@@ -1703,22 +1086,6 @@
/*
* Add all the protocols.
*/
-
- for(i = 0; i < SOCK_ARRAY_SIZE; i++)
- {
- tcp_sock_array[i] = NULL;
- udp_sock_array[i] = NULL;
- raw_sock_array[i] = NULL;
- }
- tcp_prot.inuse = 0;
- tcp_prot.highestinuse = 0;
- tcp_prot.sock_array = tcp_sock_array;
- udp_prot.inuse = 0;
- udp_prot.highestinuse = 0;
- udp_prot.sock_array = udp_sock_array;
- raw_prot.inuse = 0;
- raw_prot.highestinuse = 0;
- raw_prot.sock_array = raw_sock_array;
printk("IP Protocols: ");
for(p = inet_protocol_base; p != NULL;)
FUNET's LINUX-ADM group, linux-adm@nic.funet.fi
TCL-scripts by Sam Shen, slshen@lbl.gov