root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_close_pending
  6. tcp_dequeue_established
  7. tcp_time_wait
  8. tcp_do_retransmit
  9. reset_xmit_timer
  10. tcp_retransmit_time
  11. tcp_retransmit
  12. retransmit_timer
  13. tcp_err
  14. tcp_readable
  15. do_tcp_select
  16. tcp_select
  17. tcp_ioctl
  18. tcp_check
  19. tcp_send_check
  20. tcp_send_skb
  21. tcp_dequeue_partial
  22. tcp_send_partial
  23. tcp_enqueue_partial
  24. tcp_send_ack
  25. tcp_build_header
  26. tcp_write
  27. tcp_sendto
  28. tcp_read_wakeup
  29. cleanup_rbuf
  30. tcp_read_urg
  31. tcp_read
  32. tcp_shutdown
  33. tcp_recvfrom
  34. tcp_reset
  35. tcp_options
  36. default_mask
  37. tcp_init_seq
  38. tcp_conn_request
  39. tcp_close
  40. tcp_write_xmit
  41. tcp_ack
  42. tcp_fin
  43. tcp_data
  44. tcp_check_urg
  45. tcp_urg
  46. tcp_accept
  47. tcp_connect
  48. tcp_sequence
  49. tcp_std_reset
  50. tcp_rcv
  51. tcp_write_wakeup
  52. tcp_send_probe0
  53. tcp_setsockopt
  54. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some better commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *              Alan Cox        :       Reset on closedown bug.
  87  *      Peter De Schrijver      :       ENOTCONN check missing in tcp_sendto().
  88  *              Michael Pall    :       Handle select() after URG properly in all cases.
  89  *              Michael Pall    :       Undo the last fix in tcp_read_urg() (multi URG PUSH broke rlogin).
  90  *              Michael Pall    :       Fix the multi URG PUSH problem in tcp_readable(), select() after URG works now.
  91  *              Michael Pall    :       recv(...,MSG_OOB) never blocks in the BSD api.
  92  *              Alan Cox        :       Changed the semantics of sk->socket to 
  93  *                                      fix a race and a signal problem with
  94  *                                      accept() and async I/O.
  95  *              Alan Cox        :       Relaxed the rules on tcp_sendto().
  96  *              Yury Shevchuk   :       Really fixed accept() blocking problem.
  97  *              Craig I. Hagan  :       Allow for BSD compatible TIME_WAIT for
  98  *                                      clients/servers which listen in on
  99  *                                      fixed ports.
 100  *              Alan Cox        :       Cleaned the above up and shrank it to
 101  *                                      a sensible code size.
 102  *              Alan Cox        :       Self connect lockup fix.
 103  *              Alan Cox        :       No connect to multicast.
 104  *              Ross Biro       :       Close unaccepted children on master
 105  *                                      socket close.
 106  *              Alan Cox        :       Reset tracing code.
 107  *              Alan Cox        :       Spurious resets on shutdown.
 108  *              Alan Cox        :       Giant 15 minute/60 second timer error
 109  *              Alan Cox        :       Small whoops in selecting before an accept.
 110  *              Alan Cox        :       Kept the state trace facility since its
 111  *                                      handy for debugging.
 112  *              Alan Cox        :       More reset handler fixes.
 113  *              Alan Cox        :       Started rewriting the code based on the RFC's
 114  *                                      for other useful protocol references see:  
 115  *                                      Comer, KA9Q NOS, and for a reference on the
 116  *                                      difference between specifications and how BSD
 117  *                                      works see the 4.4lite source.
 118  *              A.N.Kuznetsov   :       Don't time wait on completion of tidy 
 119  *                                      close.
 120  *              Linus Torvalds  :       Fin/Shutdown & copied_seq changes.
 121  *              Linus Torvalds  :       Fixed BSD port reuse to work first syn
 122  *              Alan Cox        :       Reimplemented timers as per the RFC and using multiple
 123  *                                      timers for sanity. 
 124  *
 125  *
 126  * To Fix:
 127  *              Fast path the code. Two things here - fix the window calculation
 128  *              so it doesn't iterate over the queue, also spot packets with no funny
 129  *              options arriving in order and process directly.
 130  *
 131  *              Implement RFC 1191 [Path MTU discovery]
 132  *              Look at the effect of implementing RFC 1337 suggestions and their impact.
 133  *              Rewrite output state machine to use a single queue and do low window
 134  *              situations as per the spec (RFC 1122)
 135  *              Speed up input assembly algorithm.
 136  *              RFC1323 - PAWS and window scaling. PAWS is required for IPv6 so we
 137  *              could do with it working on IPv4
 138  *              User settable/learned rtt/max window/mtu
 139  *              Cope with MTU/device switches when retransmitting in tcp.
 140  *
 141  *
 142  *
 143  *              This program is free software; you can redistribute it and/or
 144  *              modify it under the terms of the GNU General Public License
 145  *              as published by the Free Software Foundation; either version
 146  *              2 of the License, or(at your option) any later version.
 147  *
 148  * Description of States:
 149  *
 150  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 151  *
 152  *      TCP_SYN_RECV            received a connection request, sent ack,
 153  *                              waiting for final ack in three-way handshake.
 154  *
 155  *      TCP_ESTABLISHED         connection established
 156  *
 157  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 158  *                              transmission of remaining buffered data
 159  *
 160  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 161  *                              to shutdown
 162  *
 163  *      TCP_CLOSING             both sides have shutdown but we still have
 164  *                              data we have to finish sending
 165  *
 166  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 167  *                              closed, can only be entered from FIN_WAIT2
 168  *                              or CLOSING.  Required because the other end
 169  *                              may not have gotten our last ACK causing it
 170  *                              to retransmit the data packet (which we ignore)
 171  *
 172  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 173  *                              us to finish writing our data and to shutdown
 174  *                              (we have to close() to move on to LAST_ACK)
 175  *
 176  *      TCP_LAST_ACK            out side has shutdown after remote has
 177  *                              shutdown.  There may still be data in our
 178  *                              buffer that we have to finish sending
 179  *              
 180  *      TCP_CLOSE               socket is finished
 181  */
 182 #include <linux/types.h>
 183 #include <linux/sched.h>
 184 #include <linux/mm.h>
 185 #include <linux/string.h>
 186 #include <linux/config.h>
 187 #include <linux/socket.h>
 188 #include <linux/sockios.h>
 189 #include <linux/termios.h>
 190 #include <linux/in.h>
 191 #include <linux/fcntl.h>
 192 #include <linux/inet.h>
 193 #include <linux/netdevice.h>
 194 #include "snmp.h"
 195 #include "ip.h"
 196 #include "protocol.h"
 197 #include "icmp.h"
 198 #include "tcp.h"
 199 #include "arp.h"
 200 #include <linux/skbuff.h>
 201 #include "sock.h"
 202 #include "route.h"
 203 #include <linux/errno.h>
 204 #include <linux/timer.h>
 205 #include <asm/system.h>
 206 #include <asm/segment.h>
 207 #include <linux/mm.h>
 208 
 209 #undef TCP_FASTPATH
 210 
 211 #define reset_msl_timer(x,y,z)  reset_timer(x,y,z)
 212 
 213 #define SEQ_TICK 3
 214 unsigned long seq_offset;
 215 struct tcp_mib  tcp_statistics;
 216 
 217 static void tcp_close(struct sock *sk, int timeout);
 218 
 219 #ifdef TCP_FASTPATH
 220 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 221 #endif
 222 
 223 /* The less said about this the better, but it works and will do for 1.2 */
 224 
 225 static struct wait_queue *master_select_wakeup;
 226 
 227 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 228 {
 229         if (a < b) 
 230                 return(a);
 231         return(b);
 232 }
 233 
 234 #undef STATE_TRACE
 235 
 236 #ifdef STATE_TRACE
 237 static char *statename[]={
 238         "Unused","Established","Syn Sent","Syn Recv",
 239         "Fin Wait 1","Fin Wait 2","Time Wait", "Close",
 240         "Close Wait","Last ACK","Listen","Closing"
 241 };
 242 #endif
 243 
 244 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 245 {
 246         if(sk->state==TCP_ESTABLISHED)
 247                 tcp_statistics.TcpCurrEstab--;
 248 #ifdef STATE_TRACE
 249         if(sk->debug)
 250                 printk("TCP sk=%p, State %s -> %s\n",sk, statename[sk->state],statename[state]);
 251 #endif  
 252         /* This is a hack but it doesn't occur often and its going to
 253            be a real        to fix nicely */
 254            
 255         if(state==TCP_ESTABLISHED && sk->state==TCP_SYN_RECV)
 256         {
 257                 wake_up_interruptible(&master_select_wakeup);
 258         }
 259         sk->state=state;
 260         if(state==TCP_ESTABLISHED)
 261                 tcp_statistics.TcpCurrEstab++;
 262 }
 263 
 264 /* This routine picks a TCP windows for a socket based on
 265    the following constraints
 266    
 267    1. The window can never be shrunk once it is offered (RFC 793)
 268    2. We limit memory per socket
 269    
 270    For now we use NET2E3's heuristic of offering half the memory
 271    we have handy. All is not as bad as this seems however because
 272    of two things. Firstly we will bin packets even within the window
 273    in order to get the data we are waiting for into the memory limit.
 274    Secondly we bin common duplicate forms at receive time
 275    
 276    Better heuristics welcome
 277 */
 278    
 279 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 280 {
 281         int new_window = sk->prot->rspace(sk);
 282         
 283         if(sk->window_clamp)
 284                 new_window=min(sk->window_clamp,new_window);
 285 /*
 286  * two things are going on here.  First, we don't ever offer a
 287  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 288  * receiver side of SWS as specified in RFC1122.
 289  * Second, we always give them at least the window they
 290  * had before, in order to avoid retracting window.  This
 291  * is technically allowed, but RFC1122 advises against it and
 292  * in practice it causes trouble.
 293  */
 294         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 295                 return(sk->window);
 296         return(new_window);
 297 }
 298 
 299 /*
 300  *      Find someone to 'accept'. Must be called with
 301  *      sk->inuse=1 or cli()
 302  */ 
 303 
 304 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 305 {
 306         struct sk_buff *p=skb_peek(&s->receive_queue);
 307         if(p==NULL)
 308                 return NULL;
 309         do
 310         {
 311                 if(p->sk->state == TCP_ESTABLISHED || p->sk->state >= TCP_FIN_WAIT1)
 312                         return p;
 313                 p=p->next;
 314         }
 315         while(p!=(struct sk_buff *)&s->receive_queue);
 316         return NULL;
 317 }
 318 
 319 
 320 /* 
 321  *      This routine closes sockets which have been at least partially
 322  *      opened, but not yet accepted. Currently it is only called by
 323  *      tcp_close, and timeout mirrors the value there. 
 324  */
 325 
 326 static void tcp_close_pending (struct sock *sk, int timeout) 
     /* [previous][next][first][last][top][bottom][index][help] */
 327 {
 328         struct sk_buff *skb;
 329 
 330         while ((skb = skb_dequeue(&sk->receive_queue)) != NULL) {
 331                 tcp_close(skb->sk, timeout);
 332                 kfree_skb(skb, FREE_READ);
 333         }
 334         return;
 335 }
 336 
 337 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 338 {
 339         struct sk_buff *skb;
 340         unsigned long flags;
 341         save_flags(flags);
 342         cli(); 
 343         skb=tcp_find_established(s);
 344         if(skb!=NULL)
 345                 skb_unlink(skb);        /* Take it off the queue */
 346         restore_flags(flags);
 347         return skb;
 348 }
 349 
 350 
 351 /*
 352  *      Enter the time wait state. 
 353  */
 354 
 355 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 356 {
 357         tcp_set_state(sk,TCP_TIME_WAIT);
 358         sk->shutdown = SHUTDOWN_MASK;
 359         if (!sk->dead)
 360                 sk->state_change(sk);
 361         reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 362 }
 363 
 364 /*
 365  *      A socket has timed out on its send queue and wants to do a
 366  *      little retransmitting. Currently this means TCP.
 367  */
 368 
 369 void tcp_do_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 370 {
 371         struct sk_buff * skb;
 372         struct proto *prot;
 373         struct device *dev;
 374 
 375         prot = sk->prot;
 376         skb = sk->send_head;
 377 
 378         while (skb != NULL)
 379         {
 380                 struct tcphdr *th;
 381                 struct iphdr *iph;
 382                 int size;
 383 
 384                 dev = skb->dev;
 385                 IS_SKB(skb);
 386                 skb->when = jiffies;
 387 
 388                 /*
 389                  * In general it's OK just to use the old packet.  However we
 390                  * need to use the current ack and window fields.  Urg and
 391                  * urg_ptr could possibly stand to be updated as well, but we
 392                  * don't keep the necessary data.  That shouldn't be a problem,
 393                  * if the other end is doing the right thing.  Since we're
 394                  * changing the packet, we have to issue a new IP identifier.
 395                  */
 396 
 397 
 398                 iph = (struct iphdr *)(skb->data + dev->hard_header_len);
 399                 th = (struct tcphdr *)(((char *)iph) + (iph->ihl << 2));
 400                 size = skb->len - (((unsigned char *) th) - skb->data);
 401 
 402                 iph->id = htons(ip_id_count++);
 403                 ip_send_check(iph);
 404 
 405                 /*
 406                  *      This is not the right way to handle this. We have to
 407                  *      issue an up to date window and ack report with this 
 408                  *      retransmit to keep the odd buggy tcp that relies on 
 409                  *      the fact BSD does this happy. 
 410                  *      We don't however need to recalculate the entire 
 411                  *      checksum, so someone wanting a small problem to play
 412                  *      with might like to implement RFC1141/RFC1624 and speed
 413                  *      this up by avoiding a full checksum.
 414                  */
 415                  
 416                 th->ack_seq = ntohl(sk->acked_seq);
 417                 th->window = ntohs(tcp_select_window(sk));
 418                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 419                 
 420                 /*
 421                  *      If the interface is (still) up and running, kick it.
 422                  */
 423 
 424                 if (dev->flags & IFF_UP)
 425                 {
 426                         /*
 427                          *      If the packet is still being sent by the device/protocol
 428                          *      below then don't retransmit. This is both needed, and good -
 429                          *      especially with connected mode AX.25 where it stops resends
 430                          *      occurring of an as yet unsent anyway frame!
 431                          *      We still add up the counts as the round trip time wants
 432                          *      adjusting.
 433                          */
 434                         if (sk && !skb_device_locked(skb))
 435                         {
 436                                 /* Remove it from any existing driver queue first! */
 437                                 skb_unlink(skb);
 438                                 /* Now queue it */
 439                                 ip_statistics.IpOutRequests++;
 440                                 dev_queue_xmit(skb, dev, sk->priority);
 441                         }
 442                 }
 443 
 444                 /*
 445                  *      Count retransmissions
 446                  */
 447                 sk->retransmits++;
 448                 sk->prot->retransmits ++;
 449 
 450                 /*
 451                  *      Only one retransmit requested.
 452                  */
 453                 if (!all)
 454                         break;
 455 
 456                 /*
 457                  *      This should cut it off before we send too many packets.
 458                  */
 459                 if (sk->retransmits >= sk->cong_window)
 460                         break;
 461                 skb = skb->link3;
 462         }
 463 }
 464 
 465 /*
 466  *      Reset the retransmission timer
 467  */
 468  
 469 static void reset_xmit_timer(struct sock *sk, int why, unsigned long when)
     /* [previous][next][first][last][top][bottom][index][help] */
 470 {
 471         del_timer(&sk->retransmit_timer);
 472         sk->ip_xmit_timeout = why;
 473         if((int)when < 0)
 474         {
 475                 when=3;
 476                 printk("Error: Negative timer in xmit_timer\n");
 477         }
 478         sk->retransmit_timer.expires=when;
 479         add_timer(&sk->retransmit_timer);
 480 }
 481 
 482 /*
 483  *      This is the normal code called for timeouts.  It does the retransmission
 484  *      and then does backoff.  tcp_do_retransmit is separated out because
 485  *      tcp_ack needs to send stuff from the retransmit queue without
 486  *      initiating a backoff.
 487  */
 488 
 489 
 490 void tcp_retransmit_time(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 491 {
 492         tcp_do_retransmit(sk, all);
 493 
 494         /*
 495          * Increase the timeout each time we retransmit.  Note that
 496          * we do not increase the rtt estimate.  rto is initialized
 497          * from rtt, but increases here.  Jacobson (SIGCOMM 88) suggests
 498          * that doubling rto each time is the least we can get away with.
 499          * In KA9Q, Karn uses this for the first few times, and then
 500          * goes to quadratic.  netBSD doubles, but only goes up to *64,
 501          * and clamps at 1 to 64 sec afterwards.  Note that 120 sec is
 502          * defined in the protocol as the maximum possible RTT.  I guess
 503          * we'll have to use something other than TCP to talk to the
 504          * University of Mars.
 505          */
 506 
 507         sk->retransmits++;
 508         sk->backoff++;
 509         sk->rto = min(sk->rto << 1, 120*HZ);
 510         reset_xmit_timer(sk, TIME_WRITE, sk->rto);
 511 }
 512 
 513 
 514 /*
 515  *      A timer event has trigger a tcp retransmit timeout. The
 516  *      socket xmit queue is ready and set up to send. Because
 517  *      the ack receive code keeps the queue straight we do
 518  *      nothing clever here.
 519  */
 520 
 521 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 522 {
 523         if (all) 
 524         {
 525                 tcp_retransmit_time(sk, all);
 526                 return;
 527         }
 528 
 529         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 530         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 531         sk->cong_count = 0;
 532 
 533         sk->cong_window = 1;
 534 
 535         /* Do the actual retransmit. */
 536         tcp_retransmit_time(sk, all);
 537 }
 538 
 539 /*
 540  *      The TCP retransmit timer.
 541  */
 542 
 543 
 544 
 545 static void retransmit_timer(unsigned long data)
     /* [previous][next][first][last][top][bottom][index][help] */
 546 {
 547         struct sock *sk = (struct sock*)data;
 548         int why = sk->ip_xmit_timeout;
 549 
 550         /* 
 551          * only process if socket is not in use
 552          */
 553 
 554         cli();
 555         if (sk->inuse || in_bh) 
 556         {
 557                 sk->retransmit_timer.expires = 10;
 558                 add_timer(&sk->timer);
 559                 sti();
 560                 return;
 561         }
 562 
 563         sk->inuse = 1;
 564         sti();
 565 
 566         /* Always see if we need to send an ack. */
 567 
 568         if (sk->ack_backlog && !sk->zapped) 
 569         {
 570                 sk->prot->read_wakeup (sk);
 571                 if (! sk->dead)
 572                         sk->data_ready(sk,0);
 573         }
 574 
 575         /* Now we need to figure out why the socket was on the timer. */
 576 
 577         switch (why) 
 578         {
 579                 /* Window probing */
 580                 case TIME_PROBE0:
 581                         tcp_send_probe0(sk);
 582                         release_sock (sk);
 583                         break;
 584                 /* Retransmitting */
 585                 case TIME_WRITE:
 586                         /* It could be we got here because we needed to send an ack.
 587                          * So we need to check for that.
 588                          */
 589                 {
 590                         struct sk_buff *skb;
 591                         unsigned long flags;
 592 
 593                         save_flags(flags);
 594                         cli();
 595                         skb = sk->send_head;
 596                         if (!skb) 
 597                         {
 598                                 restore_flags(flags);
 599                         } 
 600                         else 
 601                         {
 602                                 if (jiffies < skb->when + sk->rto) 
 603                                 {
 604                                         reset_xmit_timer (sk, TIME_WRITE, skb->when + sk->rto - jiffies);
 605                                         restore_flags(flags);
 606                                         release_sock (sk);
 607                                         break;
 608                                 }
 609                                 restore_flags(flags);
 610                                 /* printk("timer: seq %d retrans %d out %d cong %d\n", sk->send_head->h.seq,
 611                                         sk->retransmits, sk->packets_out, sk->cong_window); */
 612                                 sk->prot->retransmit (sk, 0);
 613                                 if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7))
 614                                         || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) 
 615                                 {
 616                                         arp_destroy (sk->daddr, 0);
 617                                         ip_route_check (sk->daddr);
 618                                 }
 619                                 if (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR2) 
 620                                 {
 621                                         sk->err = ETIMEDOUT;
 622                                         if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2 || sk->state == TCP_CLOSING) 
 623                                         {
 624                                                 sk->state = TCP_TIME_WAIT;
 625                                                 reset_msl_timer (sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 626                                         }
 627                                         else
 628                                         {
 629                                                 sk->prot->close (sk, 1);
 630                                                         break;
 631                                         }
 632                                 }
 633                         }
 634                         release_sock (sk);
 635                         break;
 636                 }
 637                 /* Sending Keepalives */
 638                 case TIME_KEEPOPEN:
 639                         /* 
 640                          * this reset_timer() call is a hack, this is not
 641                          * how KEEPOPEN is supposed to work.
 642                          */
 643                         reset_xmit_timer (sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
 644 
 645                         /* Send something to keep the connection open. */
 646                         if (sk->prot->write_wakeup)
 647                                   sk->prot->write_wakeup (sk);
 648                         sk->retransmits++;
 649                         if (sk->shutdown == SHUTDOWN_MASK) 
 650                         {
 651                                 sk->prot->close (sk, 1);
 652                                 sk->state = TCP_CLOSE;
 653                         }
 654                         if ((sk->state == TCP_ESTABLISHED && sk->retransmits && !(sk->retransmits & 7))
 655                                 || (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR1)) 
 656                         {
 657                                 arp_destroy (sk->daddr, 0);
 658                                 ip_route_check (sk->daddr);
 659                                 release_sock (sk);
 660                                 break;
 661                         }
 662                         if (sk->state != TCP_ESTABLISHED && sk->retransmits > TCP_RETR2) 
 663                         {
 664                                 arp_destroy (sk->daddr, 0);
 665                                 sk->err = ETIMEDOUT;
 666                                 if (sk->state == TCP_FIN_WAIT1 || sk->state == TCP_FIN_WAIT2) 
 667                                 {
 668                                         sk->state = TCP_TIME_WAIT;
 669                                         if (!sk->dead)
 670                                                 sk->state_change(sk);
 671                                         release_sock (sk);
 672                                   } 
 673                                   else 
 674                                   {
 675                                         sk->prot->close (sk, 1);
 676                                   }
 677                                   break;
 678                         }
 679                         release_sock (sk);
 680                         break;
 681                 default:
 682                         printk ("rexmit_timer: timer expired - reason unknown\n");
 683                         release_sock (sk);
 684                         break;
 685         }
 686 }
 687 
 688 /*
 689  * This routine is called by the ICMP module when it gets some
 690  * sort of error condition.  If err < 0 then the socket should
 691  * be closed and the error returned to the user.  If err > 0
 692  * it's just the icmp type << 8 | icmp code.  After adjustment
 693  * header points to the first 8 bytes of the tcp header.  We need
 694  * to find the appropriate port.
 695  */
 696 
 697 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 698         unsigned long saddr, struct inet_protocol *protocol)
 699 {
 700         struct tcphdr *th;
 701         struct sock *sk;
 702         struct iphdr *iph=(struct iphdr *)header;
 703   
 704         header+=4*iph->ihl;
 705    
 706 
 707         th =(struct tcphdr *)header;
 708         sk = get_sock(&tcp_prot, th->source, daddr, th->dest, saddr);
 709 
 710         if (sk == NULL) 
 711                 return;
 712   
 713         if(err<0)
 714         {
 715                 sk->err = -err;
 716                 sk->error_report(sk);
 717                 return;
 718         }
 719 
 720         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 721         {
 722                 /*
 723                  * FIXME:
 724                  * For now we will just trigger a linear backoff.
 725                  * The slow start code should cause a real backoff here.
 726                  */
 727                 if (sk->cong_window > 4)
 728                         sk->cong_window--;
 729                 return;
 730         }
 731 
 732 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 733 
 734         /*
 735          * If we've already connected we will keep trying
 736          * until we time out, or the user gives up.
 737          */
 738 
 739         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 740         {
 741                 if (sk->state == TCP_SYN_SENT) 
 742                 {
 743                         tcp_statistics.TcpAttemptFails++;
 744                         tcp_set_state(sk,TCP_CLOSE);
 745                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 746                 }
 747                 sk->err = icmp_err_convert[err & 0xff].errno;           
 748         }
 749         return;
 750 }
 751 
 752 
 753 /*
 754  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 755  *      in the received data queue (ie a frame missing that needs sending to us)
 756  */
 757 
 758 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 759 {
 760         unsigned long counted;
 761         unsigned long amount;
 762         struct sk_buff *skb;
 763         int sum;
 764         unsigned long flags;
 765 
 766         if(sk && sk->debug)
 767                 printk("tcp_readable: %p - ",sk);
 768 
 769         save_flags(flags);
 770         cli();
 771         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 772         {
 773                 restore_flags(flags);
 774                 if(sk && sk->debug) 
 775                         printk("empty\n");
 776                 return(0);
 777         }
 778   
 779         counted = sk->copied_seq;       /* Where we are at the moment */
 780         amount = 0;
 781   
 782         /* Do until a push or until we are out of data. */
 783         do 
 784         {
 785                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 786                         break;
 787                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 788                 if (skb->h.th->syn)
 789                         sum++;
 790                 if (sum > 0) 
 791                 {                                       /* Add it up, move on */
 792                         amount += sum;
 793                         if (skb->h.th->syn) 
 794                                 amount--;
 795                         counted += sum;
 796                 }
 797                 /*
 798                  * Don't count urg data ... but do it in the right place!
 799                  * Consider: "old_data (ptr is here) URG PUSH data"
 800                  * The old code would stop at the first push because
 801                  * it counted the urg (amount==1) and then does amount--
 802                  * *after* the loop.  This means tcp_readable() always
 803                  * returned zero if any URG PUSH was in the queue, even
 804                  * though there was normal data available. If we subtract
 805                  * the urg data right here, we even get it to work for more
 806                  * than one URG PUSH skb without normal data.
 807                  * This means that select() finally works now with urg data
 808                  * in the queue.  Note that rlogin was never affected
 809                  * because it doesn't use select(); it uses two processes
 810                  * and a blocking read().  And the queue scan in tcp_read()
 811                  * was correct.  Mike <pall@rz.uni-karlsruhe.de>
 812                  */
 813                 if (skb->h.th->urg)
 814                         amount--;       /* don't count urg data */
 815                 if (amount && skb->h.th->psh) break;
 816                 skb = skb->next;
 817         }
 818         while(skb != (struct sk_buff *)&sk->receive_queue);
 819 
 820         restore_flags(flags);
 821         if(sk->debug)
 822                 printk("got %lu bytes.\n",amount);
 823         return(amount);
 824 }
 825 
 826 
 827 /*
 828  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 829  *      listening socket has a receive queue of sockets to accept.
 830  */
 831 
 832 static int do_tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 833 {
 834         switch(sel_type) 
 835         {
 836                 case SEL_IN:
 837                         if (sk->err)
 838                                 return 1;
 839                         if (sk->state == TCP_LISTEN) {
 840                                 select_wait(&master_select_wakeup,wait);
 841                                 return (tcp_find_established(sk) != NULL);
 842                         }
 843                         if (sk->state == TCP_SYN_SENT || sk->state == TCP_SYN_RECV)
 844                                 return 0;
 845                         if (sk->acked_seq != sk->copied_seq)
 846                                 return 1;
 847                         if (sk->shutdown & RCV_SHUTDOWN)
 848                                 return 1;
 849                         return 0;
 850 
 851                 case SEL_OUT:
 852                         if (sk->shutdown & SEND_SHUTDOWN) {
 853                                 /* FIXME: should this return an error? */
 854                                 return 0;
 855                         }
 856 
 857                         /*
 858                          * This is now right thanks to a small fix
 859                          * by Matt Dillon.
 860                          */
 861                         
 862                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 863                         {
 864                                 /* This should cause connect to work ok. */
 865                                 if (sk->state == TCP_SYN_RECV ||
 866                                     sk->state == TCP_SYN_SENT) return 0;
 867                                 return 1;
 868                         }
 869                         return 0;
 870 
 871                 case SEL_EX:
 872                         if (sk->err || sk->urg_data)
 873                                 return 1;
 874                         return 0;
 875         }
 876         return 0;
 877 }
 878 
 879 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881         int retval;
 882 
 883         sk->inuse = 1;
 884         select_wait(sk->sleep, wait);
 885         retval = do_tcp_select(sk, sel_type, wait);
 886         release_sock(sk);
 887         return retval;
 888 }
 889 
 890 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 891 {
 892         int err;
 893         switch(cmd) 
 894         {
 895 
 896                 case TIOCINQ:
 897 #ifdef FIXME    /* FIXME: */
 898                 case FIONREAD:
 899 #endif
 900                 {
 901                         unsigned long amount;
 902 
 903                         if (sk->state == TCP_LISTEN) 
 904                                 return(-EINVAL);
 905 
 906                         sk->inuse = 1;
 907                         amount = tcp_readable(sk);
 908                         release_sock(sk);
 909                         err=verify_area(VERIFY_WRITE,(void *)arg,
 910                                                    sizeof(unsigned long));
 911                         if(err)
 912                                 return err;
 913                         put_fs_long(amount,(unsigned long *)arg);
 914                         return(0);
 915                 }
 916                 case SIOCATMARK:
 917                 {
 918                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq;
 919 
 920                         err = verify_area(VERIFY_WRITE,(void *) arg,
 921                                                   sizeof(unsigned long));
 922                         if (err)
 923                                 return err;
 924                         put_fs_long(answ,(int *) arg);
 925                         return(0);
 926                 }
 927                 case TIOCOUTQ:
 928                 {
 929                         unsigned long amount;
 930 
 931                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 932                         amount = sk->prot->wspace(sk);
 933                         err=verify_area(VERIFY_WRITE,(void *)arg,
 934                                                    sizeof(unsigned long));
 935                         if(err)
 936                                 return err;
 937                         put_fs_long(amount,(unsigned long *)arg);
 938                         return(0);
 939                 }
 940                 default:
 941                         return(-EINVAL);
 942         }
 943 }
 944 
 945 
 946 /*
 947  *      This routine computes a TCP checksum. 
 948  */
 949  
 950 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 951           unsigned long saddr, unsigned long daddr)
 952 {     
 953         unsigned long sum;
 954    
 955         if (saddr == 0) saddr = ip_my_addr();
 956 
 957 /*
 958  * stupid, gcc complains when I use just one __asm__ block,
 959  * something about too many reloads, but this is just two
 960  * instructions longer than what I want
 961  */
 962         __asm__("
 963             addl %%ecx, %%ebx
 964             adcl %%edx, %%ebx
 965             adcl $0, %%ebx
 966             "
 967         : "=b"(sum)
 968         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 969         : "bx", "cx", "dx" );
 970         __asm__("
 971             movl %%ecx, %%edx
 972             cld
 973             cmpl $32, %%ecx
 974             jb 2f
 975             shrl $5, %%ecx
 976             clc
 977 1:          lodsl
 978             adcl %%eax, %%ebx
 979             lodsl
 980             adcl %%eax, %%ebx
 981             lodsl
 982             adcl %%eax, %%ebx
 983             lodsl
 984             adcl %%eax, %%ebx
 985             lodsl
 986             adcl %%eax, %%ebx
 987             lodsl
 988             adcl %%eax, %%ebx
 989             lodsl
 990             adcl %%eax, %%ebx
 991             lodsl
 992             adcl %%eax, %%ebx
 993             loop 1b
 994             adcl $0, %%ebx
 995             movl %%edx, %%ecx
 996 2:          andl $28, %%ecx
 997             je 4f
 998             shrl $2, %%ecx
 999             clc
1000 3:          lodsl
1001             adcl %%eax, %%ebx
1002             loop 3b
1003             adcl $0, %%ebx
1004 4:          movl $0, %%eax
1005             testw $2, %%dx
1006             je 5f
1007             lodsw
1008             addl %%eax, %%ebx
1009             adcl $0, %%ebx
1010             movw $0, %%ax
1011 5:          test $1, %%edx
1012             je 6f
1013             lodsb
1014             addl %%eax, %%ebx
1015             adcl $0, %%ebx
1016 6:          movl %%ebx, %%eax
1017             shrl $16, %%eax
1018             addw %%ax, %%bx
1019             adcw $0, %%bx
1020             "
1021         : "=b"(sum)
1022         : "0"(sum), "c"(len), "S"(th)
1023         : "ax", "bx", "cx", "dx", "si" );
1024 
1025         /* We only want the bottom 16 bits, but we never cleared the top 16. */
1026   
1027         return((~sum) & 0xffff);
1028 }
1029 
1030 
1031 
1032 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
1033                 unsigned long daddr, int len, struct sock *sk)
1034 {
1035         th->check = 0;
1036         th->check = tcp_check(th, len, saddr, daddr);
1037         return;
1038 }
1039 
1040 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
1041 {
1042         int size;
1043         struct tcphdr * th = skb->h.th;
1044 
1045         /* length of packet (not counting length of pre-tcp headers) */
1046         size = skb->len - ((unsigned char *) th - skb->data);
1047 
1048         /* sanity check it.. */
1049         if (size < sizeof(struct tcphdr) || size > skb->len) 
1050         {
1051                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
1052                         skb, skb->data, th, skb->len);
1053                 kfree_skb(skb, FREE_WRITE);
1054                 return;
1055         }
1056 
1057         /* If we have queued a header size packet.. */
1058         if (size == sizeof(struct tcphdr)) 
1059         {
1060                 /* If its got a syn or fin its notionally included in the size..*/
1061                 if(!th->syn && !th->fin) 
1062                 {
1063                         printk("tcp_send_skb: attempt to queue a bogon.\n");
1064                         kfree_skb(skb,FREE_WRITE);
1065                         return;
1066                 }
1067         }
1068 
1069         tcp_statistics.TcpOutSegs++;  
1070 
1071         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
1072         if (after(skb->h.seq, sk->window_seq) ||
1073             (sk->retransmits && sk->ip_xmit_timeout == TIME_WRITE) ||
1074              sk->packets_out >= sk->cong_window) 
1075         {
1076                 /* checksum will be supplied by tcp_write_xmit.  So
1077                  * we shouldn't need to set it at all.  I'm being paranoid */
1078                 th->check = 0;
1079                 if (skb->next != NULL) 
1080                 {
1081                         printk("tcp_send_partial: next != NULL\n");
1082                         skb_unlink(skb);
1083                 }
1084                 skb_queue_tail(&sk->write_queue, skb);
1085                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
1086                     sk->send_head == NULL &&
1087                     sk->ack_backlog == 0)
1088                         reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
1089         } 
1090         else 
1091         {
1092                 th->ack_seq = ntohl(sk->acked_seq);
1093                 th->window = ntohs(tcp_select_window(sk));
1094 
1095                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
1096 
1097                 sk->sent_seq = sk->write_seq;
1098                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
1099                 reset_xmit_timer(sk, TIME_WRITE, sk->rto);
1100         }
1101 }
1102 
1103 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1104 {
1105         struct sk_buff * skb;
1106         unsigned long flags;
1107 
1108         save_flags(flags);
1109         cli();
1110         skb = sk->partial;
1111         if (skb) {
1112                 sk->partial = NULL;
1113                 del_timer(&sk->partial_timer);
1114         }
1115         restore_flags(flags);
1116         return skb;
1117 }
1118 
1119 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1120 {
1121         struct sk_buff *skb;
1122 
1123         if (sk == NULL)
1124                 return;
1125         while ((skb = tcp_dequeue_partial(sk)) != NULL)
1126                 tcp_send_skb(sk, skb);
1127 }
1128 
1129 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1130 {
1131         struct sk_buff * tmp;
1132         unsigned long flags;
1133 
1134         save_flags(flags);
1135         cli();
1136         tmp = sk->partial;
1137         if (tmp)
1138                 del_timer(&sk->partial_timer);
1139         sk->partial = skb;
1140         init_timer(&sk->partial_timer);
1141         sk->partial_timer.expires = HZ;
1142         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
1143         sk->partial_timer.data = (unsigned long) sk;
1144         add_timer(&sk->partial_timer);
1145         restore_flags(flags);
1146         if (tmp)
1147                 tcp_send_skb(sk, tmp);
1148 }
1149 
1150 
1151 /*
1152  *      This routine sends an ack and also updates the window. 
1153  */
1154  
1155 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
1156              struct sock *sk,
1157              struct tcphdr *th, unsigned long daddr)
1158 {
1159         struct sk_buff *buff;
1160         struct tcphdr *t1;
1161         struct device *dev = NULL;
1162         int tmp;
1163 
1164         if(sk->zapped)
1165                 return;         /* We have been reset, we may not send again */
1166         /*
1167          * We need to grab some memory, and put together an ack,
1168          * and then put it into the queue to be sent.
1169          */
1170 
1171         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
1172         if (buff == NULL) 
1173         {
1174                 /* Force it to send an ack. */
1175                 sk->ack_backlog++;
1176                 if (sk->ip_xmit_timeout != TIME_WRITE && tcp_connected(sk->state)) 
1177                 {
1178                         reset_xmit_timer(sk, TIME_WRITE, 10);
1179                 }
1180                 return;
1181         }
1182 
1183         buff->len = sizeof(struct tcphdr);
1184         buff->sk = sk;
1185         buff->localroute = sk->localroute;
1186         t1 =(struct tcphdr *) buff->data;
1187 
1188         /* Put in the IP header and routing stuff. */
1189         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
1190                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1191         if (tmp < 0) 
1192         {
1193                 buff->free = 1;
1194                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1195                 return;
1196         }
1197         buff->len += tmp;
1198         t1 =(struct tcphdr *)((char *)t1 +tmp);
1199 
1200         /* FIXME: */
1201         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
1202 
1203         /*
1204          *      Swap the send and the receive. 
1205          */
1206          
1207         t1->dest = th->source;
1208         t1->source = th->dest;
1209         t1->seq = ntohl(sequence);
1210         t1->ack = 1;
1211         sk->window = tcp_select_window(sk);
1212         t1->window = ntohs(sk->window);
1213         t1->res1 = 0;
1214         t1->res2 = 0;
1215         t1->rst = 0;
1216         t1->urg = 0;
1217         t1->syn = 0;
1218         t1->psh = 0;
1219         t1->fin = 0;
1220         if (ack == sk->acked_seq) 
1221         {
1222                 sk->ack_backlog = 0;
1223                 sk->bytes_rcv = 0;
1224                 sk->ack_timed = 0;
1225                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
1226                                   && sk->ip_xmit_timeout == TIME_WRITE) 
1227                 {
1228                         if(sk->keepopen) {
1229                                 reset_xmit_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
1230                         } else {
1231                                 delete_timer(sk);
1232                         }
1233                 }
1234         }
1235         t1->ack_seq = ntohl(ack);
1236         t1->doff = sizeof(*t1)/4;
1237         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
1238         if (sk->debug)
1239                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
1240         tcp_statistics.TcpOutSegs++;
1241         sk->prot->queue_xmit(sk, dev, buff, 1);
1242 }
1243 
1244 
1245 /* 
1246  *      This routine builds a generic TCP header. 
1247  */
1248  
1249 extern __inline int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
1250 {
1251 
1252         /* FIXME: want to get rid of this. */
1253         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
1254         th->seq = htonl(sk->write_seq);
1255         th->psh =(push == 0) ? 1 : 0;
1256         th->doff = sizeof(*th)/4;
1257         th->ack = 1;
1258         th->fin = 0;
1259         sk->ack_backlog = 0;
1260         sk->bytes_rcv = 0;
1261         sk->ack_timed = 0;
1262         th->ack_seq = htonl(sk->acked_seq);
1263         sk->window = tcp_select_window(sk);
1264         th->window = htons(sk->window);
1265 
1266         return(sizeof(*th));
1267 }
1268 
1269 /*
1270  *      This routine copies from a user buffer into a socket,
1271  *      and starts the transmit system.
1272  */
1273 
1274 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1275           int len, int nonblock, unsigned flags)
1276 {
1277         int copied = 0;
1278         int copy;
1279         int tmp;
1280         struct sk_buff *skb;
1281         struct sk_buff *send_tmp;
1282         unsigned char *buff;
1283         struct proto *prot;
1284         struct device *dev = NULL;
1285 
1286         sk->inuse=1;
1287         prot = sk->prot;
1288         while(len > 0) 
1289         {
1290                 if (sk->err) 
1291                 {                       /* Stop on an error */
1292                         release_sock(sk);
1293                         if (copied) 
1294                                 return(copied);
1295                         tmp = -sk->err;
1296                         sk->err = 0;
1297                         return(tmp);
1298                 }
1299 
1300         /*
1301          *      First thing we do is make sure that we are established. 
1302          */
1303         
1304                 if (sk->shutdown & SEND_SHUTDOWN) 
1305                 {
1306                         release_sock(sk);
1307                         sk->err = EPIPE;
1308                         if (copied) 
1309                                 return(copied);
1310                         sk->err = 0;
1311                         return(-EPIPE);
1312                 }
1313 
1314 
1315         /* 
1316          *      Wait for a connection to finish.
1317          */
1318         
1319                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
1320                 {
1321                         if (sk->err) 
1322                         {
1323                                 release_sock(sk);
1324                                 if (copied) 
1325                                         return(copied);
1326                                 tmp = -sk->err;
1327                                 sk->err = 0;
1328                                 return(tmp);
1329                         }
1330 
1331                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
1332                         {
1333                                 release_sock(sk);
1334                                 if (copied) 
1335                                         return(copied);
1336 
1337                                 if (sk->err) 
1338                                 {
1339                                         tmp = -sk->err;
1340                                         sk->err = 0;
1341                                         return(tmp);
1342                                 }
1343 
1344                                 if (sk->keepopen) 
1345                                 {
1346                                         send_sig(SIGPIPE, current, 0);
1347                                 }
1348                                 return(-EPIPE);
1349                         }
1350 
1351                         if (nonblock || copied) 
1352                         {
1353                                 release_sock(sk);
1354                                 if (copied) 
1355                                         return(copied);
1356                                 return(-EAGAIN);
1357                         }
1358 
1359                         release_sock(sk);
1360                         cli();
1361                 
1362                         if (sk->state != TCP_ESTABLISHED &&
1363                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
1364                         {
1365                                 interruptible_sleep_on(sk->sleep);
1366                                 if (current->signal & ~current->blocked) 
1367                                 {
1368                                         sti();
1369                                         if (copied) 
1370                                                 return(copied);
1371                                         return(-ERESTARTSYS);
1372                                 }
1373                         }
1374                         sk->inuse = 1;
1375                         sti();
1376                 }
1377 
1378         /*
1379          * The following code can result in copy <= if sk->mss is ever
1380          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1381          * sk->mtu is constant once SYN processing is finished.  I.e. we
1382          * had better not get here until we've seen his SYN and at least one
1383          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1384          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1385          * non-decreasing.  Note that any ioctl to set user_mss must be done
1386          * before the exchange of SYN's.  If the initial ack from the other
1387          * end has a window of 0, max_window and thus mss will both be 0.
1388          */
1389 
1390         /* 
1391          *      Now we need to check if we have a half built packet. 
1392          */
1393 
1394                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1395                 {
1396                         int hdrlen;
1397 
1398                          /* IP header + TCP header */
1399                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1400                                  + sizeof(struct tcphdr);
1401         
1402                         /* Add more stuff to the end of skb->len */
1403                         if (!(flags & MSG_OOB)) 
1404                         {
1405                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1406                                 /* FIXME: this is really a bug. */
1407                                 if (copy <= 0) 
1408                                 {
1409                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1410                                         copy = 0;
1411                                 }
1412           
1413                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1414                                 skb->len += copy;
1415                                 from += copy;
1416                                 copied += copy;
1417                                 len -= copy;
1418                                 sk->write_seq += copy;
1419                         }
1420                         if ((skb->len - hdrlen) >= sk->mss ||
1421                                 (flags & MSG_OOB) || !sk->packets_out)
1422                                 tcp_send_skb(sk, skb);
1423                         else
1424                                 tcp_enqueue_partial(skb, sk);
1425                         continue;
1426                 }
1427 
1428         /*
1429          * We also need to worry about the window.
1430          * If window < 1/2 the maximum window we've seen from this
1431          *   host, don't use it.  This is sender side
1432          *   silly window prevention, as specified in RFC1122.
1433          *   (Note that this is different than earlier versions of
1434          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1435          *   use the whole MSS.  Since the results in the right
1436          *   edge of the packet being outside the window, it will
1437          *   be queued for later rather than sent.
1438          */
1439 
1440                 copy = sk->window_seq - sk->write_seq;
1441                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1442                         copy = sk->mss;
1443                 if (copy > len)
1444                         copy = len;
1445 
1446         /*
1447          *      We should really check the window here also. 
1448          */
1449          
1450                 send_tmp = NULL;
1451                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1452                 {
1453                         /*
1454                          *      We will release the socket incase we sleep here. 
1455                          */
1456                         release_sock(sk);
1457                         /*
1458                          *      NB: following must be mtu, because mss can be increased.
1459                          *      mss is always <= mtu 
1460                          */
1461                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1462                         sk->inuse = 1;
1463                         send_tmp = skb;
1464                 } 
1465                 else 
1466                 {
1467                         /*
1468                          *      We will release the socket incase we sleep here. 
1469                          */
1470                         release_sock(sk);
1471                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1472                         sk->inuse = 1;
1473                 }
1474 
1475                 /*
1476                  *      If we didn't get any memory, we need to sleep. 
1477                  */
1478 
1479                 if (skb == NULL) 
1480                 {
1481                         sk->socket->flags |= SO_NOSPACE;
1482                         if (nonblock) 
1483                         {
1484                                 release_sock(sk);
1485                                 if (copied) 
1486                                         return(copied);
1487                                 return(-EAGAIN);
1488                         }
1489 
1490                         /*
1491                          *      FIXME: here is another race condition. 
1492                          */
1493 
1494                         tmp = sk->wmem_alloc;
1495                         release_sock(sk);
1496                         cli();
1497                         /*
1498                          *      Again we will try to avoid it. 
1499                          */
1500                         if (tmp <= sk->wmem_alloc &&
1501                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1502                                 && sk->err == 0) 
1503                         {
1504                                 sk->socket->flags &= ~SO_NOSPACE;
1505                                 interruptible_sleep_on(sk->sleep);
1506                                 if (current->signal & ~current->blocked) 
1507                                 {
1508                                         sti();
1509                                         if (copied) 
1510                                                 return(copied);
1511                                         return(-ERESTARTSYS);
1512                                 }
1513                         }
1514                         sk->inuse = 1;
1515                         sti();
1516                         continue;
1517                 }
1518 
1519                 skb->len = 0;
1520                 skb->sk = sk;
1521                 skb->free = 0;
1522                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1523         
1524                 buff = skb->data;
1525         
1526                 /*
1527                  * FIXME: we need to optimize this.
1528                  * Perhaps some hints here would be good.
1529                  */
1530                 
1531                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1532                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1533                 if (tmp < 0 ) 
1534                 {
1535                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1536                         release_sock(sk);
1537                         if (copied) 
1538                                 return(copied);
1539                         return(tmp);
1540                 }
1541                 skb->len += tmp;
1542                 skb->dev = dev;
1543                 buff += tmp;
1544                 skb->h.th =(struct tcphdr *) buff;
1545                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1546                 if (tmp < 0) 
1547                 {
1548                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1549                         release_sock(sk);
1550                         if (copied) 
1551                                 return(copied);
1552                         return(tmp);
1553                 }
1554 
1555                 if (flags & MSG_OOB) 
1556                 {
1557                         ((struct tcphdr *)buff)->urg = 1;
1558                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1559                 }
1560                 skb->len += tmp;
1561                 memcpy_fromfs(buff+tmp, from, copy);
1562 
1563                 from += copy;
1564                 copied += copy;
1565                 len -= copy;
1566                 skb->len += copy;
1567                 skb->free = 0;
1568                 sk->write_seq += copy;
1569         
1570                 if (send_tmp != NULL && sk->packets_out) 
1571                 {
1572                         tcp_enqueue_partial(send_tmp, sk);
1573                         continue;
1574                 }
1575                 tcp_send_skb(sk, skb);
1576         }
1577         sk->err = 0;
1578 
1579 /*
1580  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1581  *      interactive fast network servers. It's meant to be on and
1582  *      it really improves the throughput though not the echo time
1583  *      on my slow slip link - Alan
1584  */
1585 
1586 /*
1587  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1588  */
1589  
1590         if(sk->partial && ((!sk->packets_out) 
1591      /* If not nagling we can send on the before case too.. */
1592               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1593         ))
1594                 tcp_send_partial(sk);
1595 
1596         release_sock(sk);
1597         return(copied);
1598 }
1599 
1600 
1601 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1602            int len, int nonblock, unsigned flags,
1603            struct sockaddr_in *addr, int addr_len)
1604 {
1605         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1606                 return -EINVAL;
1607         if (sk->state == TCP_CLOSE)
1608                 return -ENOTCONN;
1609         if (addr_len < sizeof(*addr))
1610                 return -EINVAL;
1611         if (addr->sin_family && addr->sin_family != AF_INET) 
1612                 return -EINVAL;
1613         if (addr->sin_port != sk->dummy_th.dest) 
1614                 return -EISCONN;
1615         if (addr->sin_addr.s_addr != sk->daddr) 
1616                 return -EISCONN;
1617         return tcp_write(sk, from, len, nonblock, flags);
1618 }
1619 
1620 
1621 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1622 {
1623         int tmp;
1624         struct device *dev = NULL;
1625         struct tcphdr *t1;
1626         struct sk_buff *buff;
1627 
1628         if (!sk->ack_backlog) 
1629                 return;
1630 
1631         /*
1632          * FIXME: we need to put code here to prevent this routine from
1633          * being called.  Being called once in a while is ok, so only check
1634          * if this is the second time in a row.
1635          */
1636 
1637         /*
1638          * We need to grab some memory, and put together an ack,
1639          * and then put it into the queue to be sent.
1640          */
1641 
1642         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1643         if (buff == NULL) 
1644         {
1645                 /* Try again real soon. */
1646                 reset_xmit_timer(sk, TIME_WRITE, 10);
1647                 return;
1648         }
1649 
1650         buff->len = sizeof(struct tcphdr);
1651         buff->sk = sk;
1652         buff->localroute = sk->localroute;
1653         
1654         /*
1655          *      Put in the IP header and routing stuff. 
1656          */
1657 
1658         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1659                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1660         if (tmp < 0) 
1661         {
1662                 buff->free = 1;
1663                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1664                 return;
1665         }
1666 
1667         buff->len += tmp;
1668         t1 =(struct tcphdr *)(buff->data +tmp);
1669 
1670         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1671         t1->seq = htonl(sk->sent_seq);
1672         t1->ack = 1;
1673         t1->res1 = 0;
1674         t1->res2 = 0;
1675         t1->rst = 0;
1676         t1->urg = 0;
1677         t1->syn = 0;
1678         t1->psh = 0;
1679         sk->ack_backlog = 0;
1680         sk->bytes_rcv = 0;
1681         sk->window = tcp_select_window(sk);
1682         t1->window = ntohs(sk->window);
1683         t1->ack_seq = ntohl(sk->acked_seq);
1684         t1->doff = sizeof(*t1)/4;
1685         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1686         sk->prot->queue_xmit(sk, dev, buff, 1);
1687         tcp_statistics.TcpOutSegs++;
1688 }
1689 
1690 
1691 /*
1692  *      FIXME:
1693  *      This routine frees used buffers.
1694  *      It should consider sending an ACK to let the
1695  *      other end know we now have a bigger window.
1696  */
1697 
1698 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1699 {
1700         unsigned long flags;
1701         unsigned long left;
1702         struct sk_buff *skb;
1703         unsigned long rspace;
1704 
1705         if(sk->debug)
1706                 printk("cleaning rbuf for sk=%p\n", sk);
1707   
1708         save_flags(flags);
1709         cli();
1710   
1711         left = sk->prot->rspace(sk);
1712  
1713         /*
1714          * We have to loop through all the buffer headers,
1715          * and try to free up all the space we can.
1716          */
1717 
1718         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1719         {
1720                 if (!skb->used) 
1721                         break;
1722                 skb_unlink(skb);
1723                 skb->sk = sk;
1724                 kfree_skb(skb, FREE_READ);
1725         }
1726 
1727         restore_flags(flags);
1728 
1729         /*
1730          * FIXME:
1731          * At this point we should send an ack if the difference
1732          * in the window, and the amount of space is bigger than
1733          * TCP_WINDOW_DIFF.
1734          */
1735 
1736         if(sk->debug)
1737                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1738                                             left);
1739         if ((rspace=sk->prot->rspace(sk)) != left) 
1740         {
1741                 /*
1742                  * This area has caused the most trouble.  The current strategy
1743                  * is to simply do nothing if the other end has room to send at
1744                  * least 3 full packets, because the ack from those will auto-
1745                  * matically update the window.  If the other end doesn't think
1746                  * we have much space left, but we have room for at least 1 more
1747                  * complete packet than it thinks we do, we will send an ack
1748                  * immediately.  Otherwise we will wait up to .5 seconds in case
1749                  * the user reads some more.
1750                  */
1751                 sk->ack_backlog++;
1752         /*
1753          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1754          * if the other end is offering a window smaller than the agreed on MSS
1755          * (called sk->mtu here).  In theory there's no connection between send
1756          * and receive, and so no reason to think that they're going to send
1757          * small packets.  For the moment I'm using the hack of reducing the mss
1758          * only on the send side, so I'm putting mtu here.
1759          */
1760 
1761                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1762                 {
1763                         /* Send an ack right now. */
1764                         tcp_read_wakeup(sk);
1765                 } 
1766                 else 
1767                 {
1768                         /* Force it to send an ack soon. */
1769                         int was_active = del_timer(&sk->retransmit_timer);
1770                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1771                         {
1772                                 reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1773                         } 
1774                         else
1775                                 add_timer(&sk->retransmit_timer);
1776                 }
1777         }
1778 } 
1779 
1780 
1781 /*
1782  *      Handle reading urgent data. 
1783  */
1784  
1785 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1786              unsigned char *to, int len, unsigned flags)
1787 {
1788         if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1789                 return -EINVAL;
1790         if (sk->err) 
1791         {
1792                 int tmp = -sk->err;
1793                 sk->err = 0;
1794                 return tmp;
1795         }
1796 
1797         if (sk->state == TCP_CLOSE || sk->done) 
1798         {
1799                 if (!sk->done) {
1800                         sk->done = 1;
1801                         return 0;
1802                 }
1803                 return -ENOTCONN;
1804         }
1805 
1806         if (sk->shutdown & RCV_SHUTDOWN) 
1807         {
1808                 sk->done = 1;
1809                 return 0;
1810         }
1811         sk->inuse = 1;
1812         if (sk->urg_data & URG_VALID) 
1813         {
1814                 char c = sk->urg_data;
1815                 if (!(flags & MSG_PEEK))
1816                         sk->urg_data = URG_READ;
1817                 put_fs_byte(c, to);
1818                 release_sock(sk);
1819                 return 1;
1820         }
1821         release_sock(sk);
1822         
1823         /*
1824          * Fixed the recv(..., MSG_OOB) behaviour.  BSD docs and
1825          * the available implementations agree in this case:
1826          * this call should never block, independent of the
1827          * blocking state of the socket.
1828          * Mike <pall@rz.uni-karlsruhe.de>
1829          */
1830         return -EAGAIN;
1831 }
1832 
1833 
1834 /*
1835  *      This routine copies from a sock struct into the user buffer. 
1836  */
1837  
1838 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1839         int len, int nonblock, unsigned flags)
1840 {
1841         struct wait_queue wait = { current, NULL };
1842         int copied = 0;
1843         unsigned long peek_seq;
1844         unsigned long *seq;
1845         unsigned long used;
1846 
1847         /* This error should be checked. */
1848         if (sk->state == TCP_LISTEN)
1849                 return -ENOTCONN;
1850 
1851         /* Urgent data needs to be handled specially. */
1852         if (flags & MSG_OOB)
1853                 return tcp_read_urg(sk, nonblock, to, len, flags);
1854 
1855         peek_seq = sk->copied_seq;
1856         seq = &sk->copied_seq;
1857         if (flags & MSG_PEEK)
1858                 seq = &peek_seq;
1859 
1860         add_wait_queue(sk->sleep, &wait);
1861         sk->inuse = 1;
1862         while (len > 0) 
1863         {
1864                 struct sk_buff * skb;
1865                 unsigned long offset;
1866         
1867                 /*
1868                  * Are we at urgent data? Stop if we have read anything.
1869                  */
1870                 if (copied && sk->urg_data && sk->urg_seq == *seq)
1871                         break;
1872 
1873                 current->state = TASK_INTERRUPTIBLE;
1874 
1875                 skb = skb_peek(&sk->receive_queue);
1876                 do 
1877                 {
1878                         if (!skb)
1879                                 break;
1880                         if (before(*seq, skb->h.th->seq))
1881                                 break;
1882                         offset = *seq - skb->h.th->seq;
1883                         if (skb->h.th->syn)
1884                                 offset--;
1885                         if (offset < skb->len)
1886                                 goto found_ok_skb;
1887                         if (skb->h.th->fin)
1888                                 goto found_fin_ok;
1889                         if (!(flags & MSG_PEEK))
1890                                 skb->used = 1;
1891                         skb = skb->next;
1892                 }
1893                 while (skb != (struct sk_buff *)&sk->receive_queue);
1894 
1895                 if (copied)
1896                         break;
1897 
1898                 if (sk->err) 
1899                 {
1900                         copied = -sk->err;
1901                         sk->err = 0;
1902                         break;
1903                 }
1904 
1905                 if (sk->state == TCP_CLOSE) 
1906                 {
1907                         if (!sk->done) 
1908                         {
1909                                 sk->done = 1;
1910                                 break;
1911                         }
1912                         copied = -ENOTCONN;
1913                         break;
1914                 }
1915 
1916                 if (sk->shutdown & RCV_SHUTDOWN) 
1917                 {
1918                         sk->done = 1;
1919                         break;
1920                 }
1921                         
1922                 if (nonblock) 
1923                 {
1924                         copied = -EAGAIN;
1925                         break;
1926                 }
1927 
1928                 cleanup_rbuf(sk);
1929                 release_sock(sk);
1930                 sk->socket->flags |= SO_WAITDATA;
1931                 schedule();
1932                 sk->socket->flags &= ~SO_WAITDATA;
1933                 sk->inuse = 1;
1934 
1935                 if (current->signal & ~current->blocked) 
1936                 {
1937                         copied = -ERESTARTSYS;
1938                         break;
1939                 }
1940                 continue;
1941 
1942         found_ok_skb:
1943                 /* Ok so how much can we use ? */
1944                 used = skb->len - offset;
1945                 if (len < used)
1946                         used = len;
1947                 /* do we have urgent data here? */
1948                 if (sk->urg_data) 
1949                 {
1950                         unsigned long urg_offset = sk->urg_seq - *seq;
1951                         if (urg_offset < used) 
1952                         {
1953                                 if (!urg_offset) 
1954                                 {
1955                                         if (!sk->urginline) 
1956                                         {
1957                                                 ++*seq;
1958                                                 offset++;
1959                                                 used--;
1960                                         }
1961                                 }
1962                                 else
1963                                         used = urg_offset;
1964                         }
1965                 }
1966                 /* Copy it */
1967                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1968                         skb->h.th->doff*4 + offset, used);
1969                 copied += used;
1970                 len -= used;
1971                 to += used;
1972                 *seq += used;
1973                 if (after(sk->copied_seq,sk->urg_seq))
1974                         sk->urg_data = 0;
1975                 if (used + offset < skb->len)
1976                         continue;
1977                 if (skb->h.th->fin)
1978                         goto found_fin_ok;
1979                 if (flags & MSG_PEEK)
1980                         continue;
1981                 skb->used = 1;
1982                 continue;
1983 
1984         found_fin_ok:
1985                 ++*seq;
1986                 if (flags & MSG_PEEK)
1987                         break;
1988                 skb->used = 1;
1989                 sk->shutdown |= RCV_SHUTDOWN;
1990                 break;
1991 
1992         }
1993         remove_wait_queue(sk->sleep, &wait);
1994         current->state = TASK_RUNNING;
1995 
1996         /* Clean up data we have read: This will do ACK frames */
1997         cleanup_rbuf(sk);
1998         release_sock(sk);
1999         return copied;
2000 }
2001 
2002  
2003 /*
2004  *      Shutdown the sending side of a connection.
2005  */
2006 
2007 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
2008 {
2009         struct sk_buff *buff;
2010         struct tcphdr *t1, *th;
2011         struct proto *prot;
2012         int tmp;
2013         struct device *dev = NULL;
2014 
2015         /*
2016          * We need to grab some memory, and put together a FIN,
2017          * and then put it into the queue to be sent.
2018          * FIXME:
2019          *
2020          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
2021          *      Most of this is guesswork, so maybe it will work...
2022          */
2023 
2024         if (!(how & SEND_SHUTDOWN)) 
2025                 return;
2026          
2027         /*
2028          *      If we've already sent a FIN, return. 
2029          */
2030          
2031         if (sk->state == TCP_FIN_WAIT1 ||
2032             sk->state == TCP_FIN_WAIT2 ||
2033             sk->state == TCP_CLOSING ||
2034             sk->state == TCP_LAST_ACK ||
2035             sk->state == TCP_TIME_WAIT
2036         ) 
2037         {
2038                 return;
2039         }
2040         sk->inuse = 1;
2041 
2042         /*
2043          * flag that the sender has shutdown
2044          */
2045 
2046         sk->shutdown |= SEND_SHUTDOWN;
2047 
2048         /*
2049          *  Clear out any half completed packets. 
2050          */
2051 
2052         if (sk->partial)
2053                 tcp_send_partial(sk);
2054 
2055         prot =(struct proto *)sk->prot;
2056         th =(struct tcphdr *)&sk->dummy_th;
2057         release_sock(sk); /* in case the malloc sleeps. */
2058         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
2059         if (buff == NULL)
2060                 return;
2061         sk->inuse = 1;
2062 
2063         buff->sk = sk;
2064         buff->len = sizeof(*t1);
2065         buff->localroute = sk->localroute;
2066         t1 =(struct tcphdr *) buff->data;
2067 
2068         /*
2069          *      Put in the IP header and routing stuff. 
2070          */
2071 
2072         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2073                            IPPROTO_TCP, sk->opt,
2074                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2075         if (tmp < 0) 
2076         {
2077                 /*
2078                  *      Finish anyway, treat this as a send that got lost. 
2079                  *
2080                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
2081                  *      written data to be completely acknowledged along
2082                  *      with an acknowledge to our FIN.
2083                  *
2084                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
2085                  *      connection established.
2086                  */
2087                 buff->free = 1;
2088                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
2089 
2090                 if (sk->state == TCP_ESTABLISHED)
2091                         tcp_set_state(sk,TCP_FIN_WAIT1);
2092                 else if(sk->state == TCP_CLOSE_WAIT)
2093                         tcp_set_state(sk,TCP_LAST_ACK);
2094                 else
2095                         tcp_set_state(sk,TCP_FIN_WAIT2);
2096 
2097                 release_sock(sk);
2098                 return;
2099         }
2100 
2101         t1 =(struct tcphdr *)((char *)t1 +tmp);
2102         buff->len += tmp;
2103         buff->dev = dev;
2104         memcpy(t1, th, sizeof(*t1));
2105         t1->seq = ntohl(sk->write_seq);
2106         sk->write_seq++;
2107         buff->h.seq = sk->write_seq;
2108         t1->ack = 1;
2109         t1->ack_seq = ntohl(sk->acked_seq);
2110         t1->window = ntohs(sk->window=tcp_select_window(sk));
2111         t1->fin = 1;
2112         t1->rst = 0;
2113         t1->doff = sizeof(*t1)/4;
2114         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2115 
2116         /*
2117          * If there is data in the write queue, the fin must be appended to
2118          * the write queue.
2119          */
2120         
2121         if (skb_peek(&sk->write_queue) != NULL) 
2122         {
2123                 buff->free = 0;
2124                 if (buff->next != NULL) 
2125                 {
2126                         printk("tcp_shutdown: next != NULL\n");
2127                         skb_unlink(buff);
2128                 }
2129                 skb_queue_tail(&sk->write_queue, buff);
2130         } 
2131         else 
2132         {
2133                 sk->sent_seq = sk->write_seq;
2134                 sk->prot->queue_xmit(sk, dev, buff, 0);
2135                 reset_xmit_timer(sk, TIME_WRITE, sk->rto);
2136         }
2137 
2138         if (sk->state == TCP_ESTABLISHED) 
2139                 tcp_set_state(sk,TCP_FIN_WAIT1);
2140         else if (sk->state == TCP_CLOSE_WAIT)
2141                 tcp_set_state(sk,TCP_LAST_ACK);
2142         else
2143                 tcp_set_state(sk,TCP_FIN_WAIT2);
2144 
2145         release_sock(sk);
2146 }
2147 
2148 
2149 static int
2150 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
2151              int to_len, int nonblock, unsigned flags,
2152              struct sockaddr_in *addr, int *addr_len)
2153 {
2154         int result;
2155   
2156         /* 
2157          *      Have to check these first unlike the old code. If 
2158          *      we check them after we lose data on an error
2159          *      which is wrong 
2160          */
2161 
2162         if(addr_len)
2163                 *addr_len = sizeof(*addr);
2164         result=tcp_read(sk, to, to_len, nonblock, flags);
2165 
2166         if (result < 0) 
2167                 return(result);
2168   
2169         if(addr)
2170         {
2171                 addr->sin_family = AF_INET;
2172                 addr->sin_port = sk->dummy_th.dest;
2173                 addr->sin_addr.s_addr = sk->daddr;
2174         }
2175         return(result);
2176 }
2177 
2178 
2179 /*
2180  *      This routine will send an RST to the other tcp. 
2181  */
2182  
2183 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
2184           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
2185 {
2186         struct sk_buff *buff;
2187         struct tcphdr *t1;
2188         int tmp;
2189         struct device *ndev=NULL;
2190 
2191         /*
2192          *      Cannot reset a reset (Think about it).
2193          */
2194          
2195         if(th->rst)
2196                 return;
2197   
2198         /*
2199          * We need to grab some memory, and put together an RST,
2200          * and then put it into the queue to be sent.
2201          */
2202 
2203         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
2204         if (buff == NULL) 
2205                 return;
2206 
2207         buff->len = sizeof(*t1);
2208         buff->sk = NULL;
2209         buff->dev = dev;
2210         buff->localroute = 0;
2211 
2212         t1 =(struct tcphdr *) buff->data;
2213 
2214         /*
2215          *      Put in the IP header and routing stuff. 
2216          */
2217 
2218         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
2219                            sizeof(struct tcphdr),tos,ttl);
2220         if (tmp < 0) 
2221         {
2222                 buff->free = 1;
2223                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
2224                 return;
2225         }
2226 
2227         t1 =(struct tcphdr *)((char *)t1 +tmp);
2228         buff->len += tmp;
2229         memcpy(t1, th, sizeof(*t1));
2230 
2231         /*
2232          *      Swap the send and the receive. 
2233          */
2234 
2235         t1->dest = th->source;
2236         t1->source = th->dest;
2237         t1->rst = 1;  
2238         t1->window = 0;
2239   
2240         if(th->ack)
2241         {
2242                 t1->ack = 0;
2243                 t1->seq = th->ack_seq;
2244                 t1->ack_seq = 0;
2245         }
2246         else
2247         {
2248                 t1->ack = 1;
2249                 if(!th->syn)
2250                         t1->ack_seq=htonl(th->seq);
2251                 else
2252                         t1->ack_seq=htonl(th->seq+1);
2253                 t1->seq=0;
2254         }
2255 
2256         t1->syn = 0;
2257         t1->urg = 0;
2258         t1->fin = 0;
2259         t1->psh = 0;
2260         t1->doff = sizeof(*t1)/4;
2261         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
2262         prot->queue_xmit(NULL, ndev, buff, 1);
2263         tcp_statistics.TcpOutSegs++;
2264 }
2265 
2266 
2267 /*
2268  *      Look for tcp options. Parses everything but only knows about MSS.
2269  *      This routine is always called with the packet containing the SYN.
2270  *      However it may also be called with the ack to the SYN.  So you
2271  *      can't assume this is always the SYN.  It's always called after
2272  *      we have set up sk->mtu to our own MTU.
2273  *
2274  *      We need at minimum to add PAWS support here. Possibly large windows
2275  *      as Linux gets deployed on 100Mb/sec networks.
2276  */
2277  
2278 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
2279 {
2280         unsigned char *ptr;
2281         int length=(th->doff*4)-sizeof(struct tcphdr);
2282         int mss_seen = 0;
2283     
2284         ptr = (unsigned char *)(th + 1);
2285   
2286         while(length>0)
2287         {
2288                 int opcode=*ptr++;
2289                 int opsize=*ptr++;
2290                 switch(opcode)
2291                 {
2292                         case TCPOPT_EOL:
2293                                 return;
2294                         case TCPOPT_NOP:
2295                                 length-=2;
2296                                 continue;
2297                         
2298                         default:
2299                                 if(opsize<=2)   /* Avoid silly options looping forever */
2300                                         return;
2301                                 switch(opcode)
2302                                 {
2303                                         case TCPOPT_MSS:
2304                                                 if(opsize==4 && th->syn)
2305                                                 {
2306                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
2307                                                         mss_seen = 1;
2308                                                 }
2309                                                 break;
2310                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
2311                                 }
2312                                 ptr+=opsize-2;
2313                                 length-=opsize;
2314                 }
2315         }
2316         if (th->syn) 
2317         {
2318                 if (! mss_seen)
2319                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
2320         }
2321 #ifdef CONFIG_INET_PCTCP
2322         sk->mss = min(sk->max_window >> 1, sk->mtu);
2323 #else    
2324         sk->mss = min(sk->max_window, sk->mtu);
2325 #endif  
2326 }
2327 
2328 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
2329 {
2330         dst = ntohl(dst);
2331         if (IN_CLASSA(dst))
2332                 return htonl(IN_CLASSA_NET);
2333         if (IN_CLASSB(dst))
2334                 return htonl(IN_CLASSB_NET);
2335         return htonl(IN_CLASSC_NET);
2336 }
2337 
2338 /*
2339  *      Default sequence number picking algorithm.
2340  */
2341 
2342 extern inline long tcp_init_seq(void)
     /* [previous][next][first][last][top][bottom][index][help] */
2343 {
2344         return jiffies * SEQ_TICK - seq_offset; 
2345 }
2346 
2347 /*
2348  *      This routine handles a connection request.
2349  *      It should make sure we haven't already responded.
2350  *      Because of the way BSD works, we have to send a syn/ack now.
2351  *      This also means it will be harder to close a socket which is
2352  *      listening.
2353  */
2354  
2355 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
2356                  unsigned long daddr, unsigned long saddr,
2357                  struct options *opt, struct device *dev, unsigned long seq)
2358 {
2359         struct sk_buff *buff;
2360         struct tcphdr *t1;
2361         unsigned char *ptr;
2362         struct sock *newsk;
2363         struct tcphdr *th;
2364         struct device *ndev=NULL;
2365         int tmp;
2366         struct rtable *rt;
2367   
2368         th = skb->h.th;
2369 
2370         /* If the socket is dead, don't accept the connection. */
2371         if (!sk->dead) 
2372         {
2373                 sk->data_ready(sk,0);
2374         }
2375         else 
2376         {
2377                 if(sk->debug)
2378                         printk("Reset on %p: Connect on dead socket.\n",sk);
2379                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
2380                 tcp_statistics.TcpAttemptFails++;
2381                 kfree_skb(skb, FREE_READ);
2382                 return;
2383         }
2384 
2385         /*
2386          * Make sure we can accept more.  This will prevent a
2387          * flurry of syns from eating up all our memory.
2388          */
2389 
2390         if (sk->ack_backlog >= sk->max_ack_backlog) 
2391         {
2392                 tcp_statistics.TcpAttemptFails++;
2393                 kfree_skb(skb, FREE_READ);
2394                 return;
2395         }
2396 
2397         /*
2398          * We need to build a new sock struct.
2399          * It is sort of bad to have a socket without an inode attached
2400          * to it, but the wake_up's will just wake up the listening socket,
2401          * and if the listening socket is destroyed before this is taken
2402          * off of the queue, this will take care of it.
2403          */
2404 
2405         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
2406         if (newsk == NULL) 
2407         {
2408                 /* just ignore the syn.  It will get retransmitted. */
2409                 tcp_statistics.TcpAttemptFails++;
2410                 kfree_skb(skb, FREE_READ);
2411                 return;
2412         }
2413 
2414         memcpy(newsk, sk, sizeof(*newsk));
2415         skb_queue_head_init(&newsk->write_queue);
2416         skb_queue_head_init(&newsk->receive_queue);
2417         newsk->send_head = NULL;
2418         newsk->send_tail = NULL;
2419         skb_queue_head_init(&newsk->back_log);
2420         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2421         newsk->rto = TCP_TIMEOUT_INIT;
2422         newsk->mdev = 0;
2423         newsk->max_window = 0;
2424         newsk->cong_window = 1;
2425         newsk->cong_count = 0;
2426         newsk->ssthresh = 0;
2427         newsk->backoff = 0;
2428         newsk->blog = 0;
2429         newsk->intr = 0;
2430         newsk->proc = 0;
2431         newsk->done = 0;
2432         newsk->partial = NULL;
2433         newsk->pair = NULL;
2434         newsk->wmem_alloc = 0;
2435         newsk->rmem_alloc = 0;
2436         newsk->localroute = sk->localroute;
2437 
2438         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2439 
2440         newsk->err = 0;
2441         newsk->shutdown = 0;
2442         newsk->ack_backlog = 0;
2443         newsk->acked_seq = skb->h.th->seq+1;
2444         newsk->copied_seq = skb->h.th->seq+1;
2445         newsk->fin_seq = skb->h.th->seq;
2446         newsk->state = TCP_SYN_RECV;
2447         newsk->timeout = 0;
2448         newsk->ip_xmit_timeout = 0;
2449         newsk->write_seq = seq; 
2450         newsk->window_seq = newsk->write_seq;
2451         newsk->rcv_ack_seq = newsk->write_seq;
2452         newsk->urg_data = 0;
2453         newsk->retransmits = 0;
2454         newsk->linger=0;
2455         newsk->destroy = 0;
2456         init_timer(&newsk->timer);
2457         init_timer(&newsk->retransmit_timer);
2458         newsk->timer.data = (unsigned long)newsk;
2459         newsk->timer.function = &net_timer;
2460         newsk->retransmit_timer.data = (unsigned long)newsk;
2461         newsk->retransmit_timer.function=&retransmit_timer;
2462         newsk->dummy_th.source = skb->h.th->dest;
2463         newsk->dummy_th.dest = skb->h.th->source;
2464         
2465         /*
2466          *      Swap these two, they are from our point of view. 
2467          */
2468          
2469         newsk->daddr = saddr;
2470         newsk->saddr = daddr;
2471 
2472         put_sock(newsk->num,newsk);
2473         newsk->dummy_th.res1 = 0;
2474         newsk->dummy_th.doff = 6;
2475         newsk->dummy_th.fin = 0;
2476         newsk->dummy_th.syn = 0;
2477         newsk->dummy_th.rst = 0;        
2478         newsk->dummy_th.psh = 0;
2479         newsk->dummy_th.ack = 0;
2480         newsk->dummy_th.urg = 0;
2481         newsk->dummy_th.res2 = 0;
2482         newsk->acked_seq = skb->h.th->seq + 1;
2483         newsk->copied_seq = skb->h.th->seq + 1;
2484         newsk->socket = NULL;
2485 
2486         /*
2487          *      Grab the ttl and tos values and use them 
2488          */
2489 
2490         newsk->ip_ttl=sk->ip_ttl;
2491         newsk->ip_tos=skb->ip_hdr->tos;
2492 
2493         /*
2494          *      Use 512 or whatever user asked for 
2495          */
2496 
2497         /*
2498          *      Note use of sk->user_mss, since user has no direct access to newsk 
2499          */
2500 
2501         rt=ip_rt_route(saddr, NULL,NULL);
2502         
2503         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2504                 newsk->window_clamp = rt->rt_window;
2505         else
2506                 newsk->window_clamp = 0;
2507                 
2508         if (sk->user_mss)
2509                 newsk->mtu = sk->user_mss;
2510         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2511                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2512         else 
2513         {
2514 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2515                 if ((saddr ^ daddr) & default_mask(saddr))
2516 #else
2517                 if ((saddr ^ daddr) & dev->pa_mask)
2518 #endif
2519                         newsk->mtu = 576 - HEADER_SIZE;
2520                 else
2521                         newsk->mtu = MAX_WINDOW;
2522         }
2523 
2524         /*
2525          *      But not bigger than device MTU 
2526          */
2527 
2528         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2529 
2530         /*
2531          *      This will min with what arrived in the packet 
2532          */
2533 
2534         tcp_options(newsk,skb->h.th);
2535 
2536         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2537         if (buff == NULL) 
2538         {
2539                 sk->err = -ENOMEM;
2540                 newsk->dead = 1;
2541                 release_sock(newsk);
2542                 kfree_skb(skb, FREE_READ);
2543                 tcp_statistics.TcpAttemptFails++;
2544                 return;
2545         }
2546   
2547         buff->len = sizeof(struct tcphdr)+4;
2548         buff->sk = newsk;
2549         buff->localroute = newsk->localroute;
2550 
2551         t1 =(struct tcphdr *) buff->data;
2552 
2553         /*
2554          *      Put in the IP header and routing stuff. 
2555          */
2556 
2557         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2558                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2559 
2560         /*
2561          *      Something went wrong. 
2562          */
2563 
2564         if (tmp < 0) 
2565         {
2566                 sk->err = tmp;
2567                 buff->free = 1;
2568                 kfree_skb(buff,FREE_WRITE);
2569                 newsk->dead = 1;
2570                 release_sock(newsk);
2571                 skb->sk = sk;
2572                 kfree_skb(skb, FREE_READ);
2573                 tcp_statistics.TcpAttemptFails++;
2574                 return;
2575         }
2576 
2577         buff->len += tmp;
2578         t1 =(struct tcphdr *)((char *)t1 +tmp);
2579   
2580         memcpy(t1, skb->h.th, sizeof(*t1));
2581         buff->h.seq = newsk->write_seq;
2582         /*
2583          *      Swap the send and the receive. 
2584          */
2585         t1->dest = skb->h.th->source;
2586         t1->source = newsk->dummy_th.source;
2587         t1->seq = ntohl(newsk->write_seq++);
2588         t1->ack = 1;
2589         newsk->window = tcp_select_window(newsk);
2590         newsk->sent_seq = newsk->write_seq;
2591         t1->window = ntohs(newsk->window);
2592         t1->res1 = 0;
2593         t1->res2 = 0;
2594         t1->rst = 0;
2595         t1->urg = 0;
2596         t1->psh = 0;
2597         t1->syn = 1;
2598         t1->ack_seq = ntohl(skb->h.th->seq+1);
2599         t1->doff = sizeof(*t1)/4+1;
2600         ptr =(unsigned char *)(t1+1);
2601         ptr[0] = 2;
2602         ptr[1] = 4;
2603         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2604         ptr[3] =(newsk->mtu) & 0xff;
2605 
2606         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2607         newsk->prot->queue_xmit(newsk, ndev, buff, 0);
2608         reset_xmit_timer(newsk, TIME_WRITE, newsk->rto);
2609 
2610         reset_xmit_timer(newsk, TIME_WRITE , TCP_TIMEOUT_INIT);
2611         skb->sk = newsk;
2612 
2613         /*
2614          *      Charge the sock_buff to newsk. 
2615          */
2616          
2617         sk->rmem_alloc -= skb->mem_len;
2618         newsk->rmem_alloc += skb->mem_len;
2619         
2620         skb_queue_tail(&sk->receive_queue,skb);
2621         sk->ack_backlog++;
2622         release_sock(newsk);
2623         tcp_statistics.TcpOutSegs++;
2624 }
2625 
2626 
2627 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2628 {
2629         struct sk_buff *buff;
2630         struct tcphdr *t1, *th;
2631         struct proto *prot;
2632         struct device *dev=NULL;
2633         int tmp;
2634 
2635         /*
2636          * We need to grab some memory, and put together a FIN, 
2637          * and then put it into the queue to be sent.
2638          */
2639         sk->inuse = 1;
2640         sk->keepopen = 1;
2641         sk->shutdown = SHUTDOWN_MASK;
2642 
2643         if (!sk->dead) 
2644                 sk->state_change(sk);
2645 
2646         if (timeout == 0) 
2647         {
2648                 /*
2649                  *  We need to flush the recv. buffs.  We do this only on the
2650                  *  descriptor close, not protocol-sourced closes, because the
2651                  *  reader process may not have drained the data yet!
2652                  */
2653 
2654                 if (skb_peek(&sk->receive_queue) != NULL) 
2655                 {
2656                         struct sk_buff *skb;
2657                         if(sk->debug)
2658                                 printk("Clean rcv queue\n");
2659                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2660                                 kfree_skb(skb, FREE_READ);
2661                         if(sk->debug)
2662                                 printk("Cleaned.\n");
2663                 }
2664         }
2665 
2666         /*
2667          *      Get rid off any half-completed packets. 
2668          */
2669          
2670         if (sk->partial) 
2671         {
2672                 tcp_send_partial(sk);
2673         }
2674 
2675         switch(sk->state) 
2676         {
2677                 case TCP_FIN_WAIT1:
2678                 case TCP_FIN_WAIT2:
2679                 case TCP_CLOSING:
2680                         /*
2681                          * These states occur when we have already closed out
2682                          * our end.  If there is no timeout, we do not do
2683                          * anything.  We may still be in the middle of sending
2684                          * the remainder of our buffer, for example...
2685                          * resetting the timer would be inappropriate.
2686                          *
2687                          * XXX if retransmit count reaches limit, is tcp_close()
2688                          * called with timeout == 1 ? if not, we need to fix that.
2689                          */
2690                         if (!timeout) {
2691                                 int timer_active;
2692 
2693                                 timer_active = del_timer(&sk->timer);
2694                                 if (timer_active)
2695                                         add_timer(&sk->timer);
2696                                 else
2697                                         reset_msl_timer(sk, TIME_CLOSE, 4 * sk->rto);
2698                         }
2699                         if (timeout) 
2700                                 tcp_time_wait(sk);
2701                         release_sock(sk);
2702                         return; /* break causes a double release - messy */
2703                 case TCP_TIME_WAIT:
2704                 case TCP_LAST_ACK:
2705                         /*
2706                          * A timeout from these states terminates the TCB.
2707                          */
2708                         if (timeout) 
2709                         {
2710                                 tcp_set_state(sk,TCP_CLOSE);
2711                         }
2712                         release_sock(sk);
2713                         return;
2714                 case TCP_LISTEN:
2715                         /* we need to drop any sockets which have been connected,
2716                            but have not yet been accepted. */
2717                         tcp_set_state(sk,TCP_CLOSE);
2718                         tcp_close_pending(sk, timeout);
2719                         release_sock(sk);
2720                         return;
2721                 case TCP_CLOSE:
2722                         release_sock(sk);
2723                         return;
2724                 case TCP_CLOSE_WAIT:
2725                 case TCP_ESTABLISHED:
2726                 case TCP_SYN_SENT:
2727                 case TCP_SYN_RECV:
2728                         prot =(struct proto *)sk->prot;
2729                         th =(struct tcphdr *)&sk->dummy_th;
2730                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2731                         if (buff == NULL) 
2732                         {
2733                                 /* This will force it to try again later. */
2734                                 /* Or it would have if someone released the socket
2735                                    first. Anyway it might work now */
2736                                 release_sock(sk);
2737                                 if (sk->state != TCP_CLOSE_WAIT)
2738                                         tcp_set_state(sk,TCP_ESTABLISHED);
2739                                 reset_msl_timer(sk, TIME_CLOSE, 100);
2740                                 return;
2741                         }
2742                         buff->sk = sk;
2743                         buff->free = 0;
2744                         buff->len = sizeof(*t1);
2745                         buff->localroute = sk->localroute;
2746                         t1 =(struct tcphdr *) buff->data;
2747         
2748                         /*
2749                          *      Put in the IP header and routing stuff. 
2750                          */
2751                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2752                                          IPPROTO_TCP, sk->opt,
2753                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2754                         if (tmp < 0) 
2755                         {
2756                                 sk->write_seq++;        /* Very important 8) */
2757                                 kfree_skb(buff,FREE_WRITE);
2758 
2759                                 /*
2760                                  * Enter FIN_WAIT1 to await completion of
2761                                  * written out data and ACK to our FIN.
2762                                  */
2763 
2764                                 if(sk->state==TCP_ESTABLISHED)
2765                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2766                                 else
2767                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2768                                 reset_msl_timer(sk, TIME_CLOSE,4*sk->rto);
2769                                 if(timeout)
2770                                         tcp_time_wait(sk);
2771 
2772                                 release_sock(sk);
2773                                 return;
2774                         }
2775 
2776                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2777                         buff->len += tmp;
2778                         buff->dev = dev;
2779                         memcpy(t1, th, sizeof(*t1));
2780                         t1->seq = ntohl(sk->write_seq);
2781                         sk->write_seq++;
2782                         buff->h.seq = sk->write_seq;
2783                         t1->ack = 1;
2784         
2785                         /* 
2786                          *      Ack everything immediately from now on. 
2787                          */
2788 
2789                         sk->delay_acks = 0;
2790                         t1->ack_seq = ntohl(sk->acked_seq);
2791                         t1->window = ntohs(sk->window=tcp_select_window(sk));
2792                         t1->fin = 1;
2793                         t1->rst = 0;
2794                         t1->doff = sizeof(*t1)/4;
2795                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2796 
2797                         tcp_statistics.TcpOutSegs++;
2798 
2799                         if (skb_peek(&sk->write_queue) == NULL) 
2800                         {
2801                                 sk->sent_seq = sk->write_seq;
2802                                 prot->queue_xmit(sk, dev, buff, 0);
2803                                 reset_xmit_timer(sk, TIME_WRITE, sk->rto);
2804                         } 
2805                         else 
2806                         {
2807                                 reset_xmit_timer(sk, TIME_WRITE, sk->rto);
2808                                 if (buff->next != NULL) 
2809                                 {
2810                                         printk("tcp_close: next != NULL\n");
2811                                         skb_unlink(buff);
2812                                 }
2813                                 skb_queue_tail(&sk->write_queue, buff);
2814                         }
2815 
2816                         /*
2817                          * If established (normal close), enter FIN_WAIT1.
2818                          * If in CLOSE_WAIT, enter LAST_ACK
2819                          * If in CLOSING, remain in CLOSING
2820                          * otherwise enter FIN_WAIT2
2821                          */
2822 
2823                         if (sk->state == TCP_ESTABLISHED)
2824                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2825                         else if (sk->state == TCP_CLOSE_WAIT)
2826                                 tcp_set_state(sk,TCP_LAST_ACK);
2827                         else if (sk->state != TCP_CLOSING)
2828                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2829         }
2830         release_sock(sk);
2831 }
2832 
2833 
2834 /*
2835  * This routine takes stuff off of the write queue,
2836  * and puts it in the xmit queue.
2837  */
2838 static void
2839 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2840 {
2841         struct sk_buff *skb;
2842 
2843         /*
2844          *      The bytes will have to remain here. In time closedown will
2845          *      empty the write queue and all will be happy 
2846          */
2847 
2848         if(sk->zapped)
2849                 return;
2850 
2851         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2852                 before(skb->h.seq, sk->window_seq + 1) &&
2853                 (sk->retransmits == 0 ||
2854                  sk->ip_xmit_timeout != TIME_WRITE ||
2855                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2856                 && sk->packets_out < sk->cong_window) 
2857         {
2858                 IS_SKB(skb);
2859                 skb_unlink(skb);
2860                 /* See if we really need to send the packet. */
2861                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2862                 {
2863                         sk->retransmits = 0;
2864                         kfree_skb(skb, FREE_WRITE);
2865                         if (!sk->dead) 
2866                                 sk->write_space(sk);
2867                 } 
2868                 else
2869                 {
2870                         struct tcphdr *th;
2871                         struct iphdr *iph;
2872                         int size;
2873 /*
2874  * put in the ack seq and window at this point rather than earlier,
2875  * in order to keep them monotonic.  We really want to avoid taking
2876  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2877  * Ack and window will in general have changed since this packet was put
2878  * on the write queue.
2879  */
2880                         iph = (struct iphdr *)(skb->data +
2881                                                skb->dev->hard_header_len);
2882                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2883                         size = skb->len - (((unsigned char *) th) - skb->data);
2884                         
2885                         th->ack_seq = ntohl(sk->acked_seq);
2886                         th->window = ntohs(tcp_select_window(sk));
2887 
2888                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2889 
2890                         sk->sent_seq = skb->h.seq;
2891                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2892                         reset_xmit_timer(sk, TIME_WRITE, sk->rto);
2893                 }
2894         }
2895 }
2896 
2897 
2898 /*
2899  *      This routine deals with incoming acks, but not outgoing ones.
2900  */
2901 
2902 extern __inline__ int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2903 {
2904         unsigned long ack;
2905         int flag = 0;
2906 
2907         /* 
2908          * 1 - there was data in packet as well as ack or new data is sent or 
2909          *     in shutdown state
2910          * 2 - data from retransmit queue was acked and removed
2911          * 4 - window shrunk or data from retransmit queue was acked and removed
2912          */
2913 
2914         if(sk->zapped)
2915                 return(1);      /* Dead, cant ack any more so why bother */
2916 
2917         ack = ntohl(th->ack_seq);
2918         if (ntohs(th->window) > sk->max_window) 
2919         {
2920                 sk->max_window = ntohs(th->window);
2921 #ifdef CONFIG_INET_PCTCP
2922                 sk->mss = min(sk->max_window>>1, sk->mtu);
2923 #else
2924                 sk->mss = min(sk->max_window, sk->mtu);
2925 #endif  
2926         }
2927 
2928         if (sk->retransmits && sk->ip_xmit_timeout == TIME_KEEPOPEN)
2929                 sk->retransmits = 0;
2930 
2931         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2932         {
2933                 if(sk->debug)
2934                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2935                         
2936                 /*
2937                  *      Keepalive processing.
2938                  */
2939                  
2940                 if (after(ack, sk->sent_seq)) 
2941                 {
2942                         return(0);
2943                 }
2944                 if (sk->keepopen) 
2945                 {
2946                         if(sk->ip_xmit_timeout==TIME_KEEPOPEN)
2947                                 reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2948                 }
2949                 return(1);
2950         }
2951 
2952         if (len != th->doff*4) 
2953                 flag |= 1;
2954 
2955         /* See if our window has been shrunk. */
2956 
2957         if (after(sk->window_seq, ack+ntohs(th->window))) 
2958         {
2959                 /*
2960                  * We may need to move packets from the send queue
2961                  * to the write queue, if the window has been shrunk on us.
2962                  * The RFC says you are not allowed to shrink your window
2963                  * like this, but if the other end does, you must be able
2964                  * to deal with it.
2965                  */
2966                 struct sk_buff *skb;
2967                 struct sk_buff *skb2;
2968                 struct sk_buff *wskb = NULL;
2969         
2970                 skb2 = sk->send_head;
2971                 sk->send_head = NULL;
2972                 sk->send_tail = NULL;
2973         
2974                 /*
2975                  *      This is an artifact of a flawed concept. We want one
2976                  *      queue and a smarter send routine when we send all.
2977                  */
2978         
2979                 flag |= 4;
2980         
2981                 sk->window_seq = ack + ntohs(th->window);
2982                 cli();
2983                 while (skb2 != NULL) 
2984                 {
2985                         skb = skb2;
2986                         skb2 = skb->link3;
2987                         skb->link3 = NULL;
2988                         if (after(skb->h.seq, sk->window_seq)) 
2989                         {
2990                                 if (sk->packets_out > 0) 
2991                                         sk->packets_out--;
2992                                 /* We may need to remove this from the dev send list. */
2993                                 if (skb->next != NULL) 
2994                                 {
2995                                         skb_unlink(skb);                                
2996                                 }
2997                                 /* Now add it to the write_queue. */
2998                                 if (wskb == NULL)
2999                                         skb_queue_head(&sk->write_queue,skb);
3000                                 else
3001                                         skb_append(wskb,skb);
3002                                 wskb = skb;
3003                         } 
3004                         else 
3005                         {
3006                                 if (sk->send_head == NULL) 
3007                                 {
3008                                         sk->send_head = skb;
3009                                         sk->send_tail = skb;
3010                                 }
3011                                 else
3012                                 {
3013                                         sk->send_tail->link3 = skb;
3014                                         sk->send_tail = skb;
3015                                 }
3016                                 skb->link3 = NULL;
3017                         }
3018                 }
3019                 sti();
3020         }
3021 
3022         /*
3023          *      Pipe has emptied
3024          */
3025          
3026         if (sk->send_tail == NULL || sk->send_head == NULL) 
3027         {
3028                 sk->send_head = NULL;
3029                 sk->send_tail = NULL;
3030                 sk->packets_out= 0;
3031         }
3032 
3033         sk->window_seq = ack + ntohs(th->window);
3034 
3035         /* We don't want too many packets out there. */
3036         if (sk->ip_xmit_timeout == TIME_WRITE && 
3037                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
3038         {
3039                 /* 
3040                  * This is Jacobson's slow start and congestion avoidance. 
3041                  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
3042                  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
3043                  * counter and increment it once every cwnd times.  It's possible
3044                  * that this should be done only if sk->retransmits == 0.  I'm
3045                  * interpreting "new data is acked" as including data that has
3046                  * been retransmitted but is just now being acked.
3047                  */
3048                 if (sk->cong_window < sk->ssthresh)  
3049                         /* 
3050                          *      In "safe" area, increase
3051                          */
3052                         sk->cong_window++;
3053                 else 
3054                 {
3055                         /*
3056                          *      In dangerous area, increase slowly.  In theory this is
3057                          *      sk->cong_window += 1 / sk->cong_window
3058                          */
3059                         if (sk->cong_count >= sk->cong_window) 
3060                         {
3061                                 sk->cong_window++;
3062                                 sk->cong_count = 0;
3063                         }
3064                         else 
3065                                 sk->cong_count++;
3066                 }
3067         }
3068 
3069         sk->rcv_ack_seq = ack;
3070 
3071         /*
3072          *      If this ack opens up a zero window, clear backoff.  It was
3073          *      being used to time the probes, and is probably far higher than
3074          *      it needs to be for normal retransmission.
3075          */
3076 
3077         if (sk->ip_xmit_timeout == TIME_PROBE0) 
3078         {
3079                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
3080                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
3081                 {
3082                         sk->retransmits = 0;
3083                         sk->backoff = 0;
3084                         
3085                         /*
3086                          *      Recompute rto from rtt.  this eliminates any backoff.
3087                          */
3088 
3089                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
3090                         if (sk->rto > 120*HZ)
3091                                 sk->rto = 120*HZ;
3092                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
3093                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
3094                                                    .2 of a second is going to need huge windows (SIGH) */
3095                         sk->rto = 20;
3096                 }
3097         }
3098 
3099         /* 
3100          *      See if we can take anything off of the retransmit queue.
3101          */
3102    
3103         while(sk->send_head != NULL) 
3104         {
3105                 /* Check for a bug. */
3106                 if (sk->send_head->link3 &&
3107                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
3108                         printk("INET: tcp.c: *** bug send_list out of order.\n");
3109                 if (before(sk->send_head->h.seq, ack+1)) 
3110                 {
3111                         struct sk_buff *oskb;   
3112                         if (sk->retransmits) 
3113                         {       
3114                                 /*
3115                                  *      We were retransmitting.  don't count this in RTT est 
3116                                  */
3117                                 flag |= 2;
3118 
3119                                 /*
3120                                  * even though we've gotten an ack, we're still
3121                                  * retransmitting as long as we're sending from
3122                                  * the retransmit queue.  Keeping retransmits non-zero
3123                                  * prevents us from getting new data interspersed with
3124                                  * retransmissions.
3125                                  */
3126 
3127                                 if (sk->send_head->link3)
3128                                         sk->retransmits = 1;
3129                                 else
3130                                         sk->retransmits = 0;
3131                         }
3132                         /*
3133                          * Note that we only reset backoff and rto in the
3134                          * rtt recomputation code.  And that doesn't happen
3135                          * if there were retransmissions in effect.  So the
3136                          * first new packet after the retransmissions is
3137                          * sent with the backoff still in effect.  Not until
3138                          * we get an ack from a non-retransmitted packet do
3139                          * we reset the backoff and rto.  This allows us to deal
3140                          * with a situation where the network delay has increased
3141                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
3142                          */
3143 
3144                         /*
3145                          *      We have one less packet out there. 
3146                          */
3147                          
3148                         if (sk->packets_out > 0) 
3149                                 sk->packets_out --;
3150                         /* 
3151                          *      Wake up the process, it can probably write more. 
3152                          */
3153                         if (!sk->dead) 
3154                                 sk->write_space(sk);
3155                         oskb = sk->send_head;
3156 
3157                         if (!(flag&2)) 
3158                         {
3159                                 long m;
3160         
3161                                 /*
3162                                  *      The following amusing code comes from Jacobson's
3163                                  *      article in SIGCOMM '88.  Note that rtt and mdev
3164                                  *      are scaled versions of rtt and mean deviation.
3165                                  *      This is designed to be as fast as possible 
3166                                  *      m stands for "measurement".
3167                                  */
3168         
3169                                 m = jiffies - oskb->when;  /* RTT */
3170                                 if(m<=0)
3171                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
3172                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
3173                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
3174                                 if (m < 0)
3175                                         m = -m;         /* m is now abs(error) */
3176                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
3177                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
3178         
3179                                 /*
3180                                  *      Now update timeout.  Note that this removes any backoff.
3181                                  */
3182                          
3183                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
3184                                 if (sk->rto > 120*HZ)
3185                                         sk->rto = 120*HZ;
3186                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
3187                                         sk->rto = 20;
3188                                 sk->backoff = 0;
3189                         }
3190                         flag |= (2|4);
3191                         cli();
3192                         oskb = sk->send_head;
3193                         IS_SKB(oskb);
3194                         sk->send_head = oskb->link3;
3195                         if (sk->send_head == NULL) 
3196                         {
3197                                 sk->send_tail = NULL;
3198                         }
3199 
3200                 /*
3201                  *      We may need to remove this from the dev send list. 
3202                  */
3203 
3204                         if (oskb->next)
3205                                 skb_unlink(oskb);
3206                         sti();
3207                         kfree_skb(oskb, FREE_WRITE); /* write. */
3208                         if (!sk->dead) 
3209                                 sk->write_space(sk);
3210                 }
3211                 else
3212                 {
3213                         break;
3214                 }
3215         }
3216 
3217         /*
3218          * XXX someone ought to look at this too.. at the moment, if skb_peek()
3219          * returns non-NULL, we complete ignore the timer stuff in the else
3220          * clause.  We ought to organize the code so that else clause can
3221          * (should) be executed regardless, possibly moving the PROBE timer
3222          * reset over.  The skb_peek() thing should only move stuff to the
3223          * write queue, NOT also manage the timer functions.
3224          */
3225 
3226         /*
3227          * Maybe we can take some stuff off of the write queue,
3228          * and put it onto the xmit queue.
3229          */
3230         if (skb_peek(&sk->write_queue) != NULL) 
3231         {
3232                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
3233                         (sk->retransmits == 0 || 
3234                          sk->ip_xmit_timeout != TIME_WRITE ||
3235                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
3236                         && sk->packets_out < sk->cong_window) 
3237                 {
3238                         flag |= 1;
3239                         tcp_write_xmit(sk);
3240                 }
3241                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
3242                         sk->send_head == NULL &&
3243                         sk->ack_backlog == 0 &&
3244                         sk->state != TCP_TIME_WAIT) 
3245                 {
3246                         reset_xmit_timer(sk, TIME_PROBE0, sk->rto);
3247                 }               
3248         }
3249         else
3250         {
3251                 /*
3252                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
3253                  * from TCP_CLOSE we don't do anything
3254                  *
3255                  * from anything else, if there is write data (or fin) pending,
3256                  * we use a TIME_WRITE timeout, else if keepalive we reset to
3257                  * a KEEPALIVE timeout, else we delete the timer.
3258                  *
3259                  * We do not set flag for nominal write data, otherwise we may
3260                  * force a state where we start to write itsy bitsy tidbits
3261                  * of data.
3262                  */
3263 
3264                 switch(sk->state) {
3265                 case TCP_TIME_WAIT:
3266                         /*
3267                          * keep us in TIME_WAIT until we stop getting packets,
3268                          * reset the timeout.
3269                          */
3270                         reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3271                         break;
3272                 case TCP_CLOSE:
3273                         /*
3274                          * don't touch the timer.
3275                          */
3276                         break;
3277                 default:
3278                         /*
3279                          * must check send_head, write_queue, and ack_backlog
3280                          * to determine which timeout to use.
3281                          */
3282                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
3283                                 reset_xmit_timer(sk, TIME_WRITE, sk->rto);
3284                         } else if (sk->keepopen) {
3285                                 reset_xmit_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
3286                         } else {
3287                                 del_timer(&sk->retransmit_timer);
3288                                 sk->ip_xmit_timeout = 0;
3289                         }
3290                         break;
3291                 }
3292         }
3293 
3294         if (sk->packets_out == 0 && sk->partial != NULL &&
3295                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
3296         {
3297                 flag |= 1;
3298                 tcp_send_partial(sk);
3299         }
3300 
3301         /*
3302          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
3303          * we are now waiting for an acknowledge to our FIN.  The other end is
3304          * already in TIME_WAIT.
3305          *
3306          * Move to TCP_CLOSE on success.
3307          */
3308 
3309         if (sk->state == TCP_LAST_ACK) 
3310         {
3311                 if (!sk->dead)
3312                         sk->state_change(sk);
3313                 if(sk->debug)
3314                         printk("rcv_ack_seq: %lX==%lX, acked_seq: %lX==%lX\n",
3315                                 sk->rcv_ack_seq,sk->write_seq,sk->acked_seq,sk->fin_seq);
3316                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
3317                 {
3318                         flag |= 1;
3319                         tcp_set_state(sk,TCP_CLOSE);
3320                         sk->shutdown = SHUTDOWN_MASK;
3321                 }
3322         }
3323 
3324         /*
3325          *      Incoming ACK to a FIN we sent in the case of our initiating the close.
3326          *
3327          *      Move to FIN_WAIT2 to await a FIN from the other end. Set
3328          *      SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
3329          */
3330 
3331         if (sk->state == TCP_FIN_WAIT1) 
3332         {
3333 
3334                 if (!sk->dead) 
3335                         sk->state_change(sk);
3336                 if (sk->rcv_ack_seq == sk->write_seq) 
3337                 {
3338                         flag |= 1;
3339                         sk->shutdown |= SEND_SHUTDOWN;
3340                         tcp_set_state(sk, TCP_FIN_WAIT2);
3341                 }
3342         }
3343 
3344         /*
3345          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
3346          *
3347          *      Move to TIME_WAIT
3348          */
3349 
3350         if (sk->state == TCP_CLOSING) 
3351         {
3352 
3353                 if (!sk->dead) 
3354                         sk->state_change(sk);
3355                 if (sk->rcv_ack_seq == sk->write_seq) 
3356                 {
3357                         flag |= 1;
3358                         tcp_time_wait(sk);
3359                 }
3360         }
3361         
3362         /*
3363          *      Final ack of a three way shake 
3364          */
3365          
3366         if(sk->state==TCP_SYN_RECV)
3367         {
3368                 tcp_set_state(sk, TCP_ESTABLISHED);
3369                 tcp_options(sk,th);
3370                 sk->dummy_th.dest=th->source;
3371                 sk->copied_seq = sk->acked_seq;
3372                 if(!sk->dead)
3373                         sk->state_change(sk);
3374                 if(sk->max_window==0)
3375                 {
3376                         sk->max_window=32;
3377                         sk->mss=min(sk->max_window,sk->mtu);
3378                 }
3379         }
3380         
3381         /*
3382          * I make no guarantees about the first clause in the following
3383          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
3384          * what conditions "!flag" would be true.  However I think the rest
3385          * of the conditions would prevent that from causing any
3386          * unnecessary retransmission. 
3387          *   Clearly if the first packet has expired it should be 
3388          * retransmitted.  The other alternative, "flag&2 && retransmits", is
3389          * harder to explain:  You have to look carefully at how and when the
3390          * timer is set and with what timeout.  The most recent transmission always
3391          * sets the timer.  So in general if the most recent thing has timed
3392          * out, everything before it has as well.  So we want to go ahead and
3393          * retransmit some more.  If we didn't explicitly test for this
3394          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
3395          * would not be true.  If you look at the pattern of timing, you can
3396          * show that rto is increased fast enough that the next packet would
3397          * almost never be retransmitted immediately.  Then you'd end up
3398          * waiting for a timeout to send each packet on the retransmission
3399          * queue.  With my implementation of the Karn sampling algorithm,
3400          * the timeout would double each time.  The net result is that it would
3401          * take a hideous amount of time to recover from a single dropped packet.
3402          * It's possible that there should also be a test for TIME_WRITE, but
3403          * I think as long as "send_head != NULL" and "retransmit" is on, we've
3404          * got to be in real retransmission mode.
3405          *   Note that tcp_do_retransmit is called with all==1.  Setting cong_window
3406          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3407          * As long as no further losses occur, this seems reasonable.
3408          */
3409         
3410         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3411                (((flag&2) && sk->retransmits) ||
3412                (sk->send_head->when + sk->rto < jiffies))) 
3413         {
3414                 if(sk->send_head->when + sk->rto < jiffies)
3415                         tcp_retransmit(sk,0);   
3416                 else
3417                 {
3418                         tcp_do_retransmit(sk, 1);
3419                         reset_xmit_timer(sk, TIME_WRITE, sk->rto);
3420                 }
3421         }
3422 
3423         return(1);
3424 }
3425 
3426 
3427 /*
3428  *      Process the FIN bit. This now behaves as it is supposed to work
3429  *      and the FIN takes effect when it is validly part of sequence
3430  *      space. Not before when we get holes.
3431  *
3432  *      If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3433  *      (and thence onto LAST-ACK and finally, CLOSE, we never enter
3434  *      TIME-WAIT)
3435  *
3436  *      If we are in FINWAIT-1, a received FIN indicates simultaneous
3437  *      close and we go into CLOSING (and later onto TIME-WAIT)
3438  *
3439  *      If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3440  *
3441  */
3442  
3443 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
3444 {
3445         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3446 
3447         if (!sk->dead) 
3448         {
3449                 sk->state_change(sk);
3450                 sock_wake_async(sk->socket, 1);
3451         }
3452 
3453         switch(sk->state) 
3454         {
3455                 case TCP_SYN_RECV:
3456                 case TCP_SYN_SENT:
3457                 case TCP_ESTABLISHED:
3458                         /*
3459                          * move to CLOSE_WAIT, tcp_data() already handled
3460                          * sending the ack.
3461                          */     /* Check me --------------vvvvvvv */
3462                         reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3463                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3464                         if (th->rst)
3465                                 sk->shutdown = SHUTDOWN_MASK;
3466                         break;
3467 
3468                 case TCP_CLOSE_WAIT:
3469                 case TCP_CLOSING:
3470                         /*
3471                          * received a retransmission of the FIN, do
3472                          * nothing.
3473                          */
3474                         break;
3475                 case TCP_TIME_WAIT:
3476                         /*
3477                          * received a retransmission of the FIN,
3478                          * restart the TIME_WAIT timer.
3479                          */
3480                         reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3481                         return(0);
3482                 case TCP_FIN_WAIT1:
3483                         /*
3484                          * This case occurs when a simultaneous close
3485                          * happens, we must ack the received FIN and
3486                          * enter the CLOSING state.
3487                          *
3488                          * This causes a WRITE timeout, which will either
3489                          * move on to TIME_WAIT when we timeout, or resend
3490                          * the FIN properly (maybe we get rid of that annoying
3491                          * FIN lost hang). The TIME_WRITE code is already correct
3492                          * for handling this timeout.
3493                          */
3494 
3495                         if(sk->ip_xmit_timeout != TIME_WRITE)
3496                                 reset_xmit_timer(sk, TIME_WRITE, sk->rto);
3497                         tcp_set_state(sk,TCP_CLOSING);
3498                         break;
3499                 case TCP_FIN_WAIT2:
3500                         /*
3501                          * received a FIN -- send ACK and enter TIME_WAIT
3502                          */
3503                         reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3504                         sk->shutdown|=SHUTDOWN_MASK;
3505                         tcp_set_state(sk,TCP_TIME_WAIT);
3506                         break;
3507                 case TCP_CLOSE:
3508                         /*
3509                          * already in CLOSE
3510                          */
3511                         break;
3512                 default:
3513                         tcp_set_state(sk,TCP_LAST_ACK);
3514         
3515                         /* Start the timers. */
3516                         reset_msl_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3517                         return(0);
3518         }
3519 
3520         return(0);
3521 }
3522 
3523 
3524 
3525 /*
3526  *      This routine handles the data.  If there is room in the buffer,
3527  *      it will be have already been moved into it.  If there is no
3528  *      room, then we will just have to discard the packet.
3529  */
3530 
3531 extern __inline__ int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3532          unsigned long saddr, unsigned short len)
3533 {
3534         struct sk_buff *skb1, *skb2;
3535         struct tcphdr *th;
3536         int dup_dumped=0;
3537         unsigned long new_seq;
3538         unsigned long shut_seq;
3539 
3540         th = skb->h.th;
3541         skb->len = len -(th->doff*4);
3542 
3543         /*
3544          *      The bytes in the receive read/assembly queue has increased. Needed for the
3545          *      low memory discard algorithm 
3546          */
3547            
3548         sk->bytes_rcv += skb->len;
3549         
3550         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3551         {
3552                 /* 
3553                  *      Don't want to keep passing ack's back and forth. 
3554                  *      (someone sent us dataless, boring frame)
3555                  */
3556                 if (!th->ack)
3557                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3558                 kfree_skb(skb, FREE_READ);
3559                 return(0);
3560         }
3561         
3562         /*
3563          *      We no longer have anyone receiving data on this connection.
3564          */
3565 
3566         if(sk->shutdown & RCV_SHUTDOWN)
3567         {
3568                 /*
3569                  *      FIXME: BSD has some magic to avoid sending resets to
3570                  *      broken 4.2 BSD keepalives. Much to my surprise a few non
3571                  *      BSD stacks still have broken keepalives so we want to
3572                  *      cope with it.
3573                  */
3574                  
3575                 if(skb->len)    /* We don't care if its just an ack or
3576                                    a keepalive/window probe */
3577                 {
3578                         new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3579                         
3580                         /* Do this the way 4.4BSD treats it. Not what I'd
3581                            regard as the meaning of the spec but its what BSD
3582                            does and clearly they know everything 8) */
3583 
3584                         /*
3585                          *      This is valid because of two things
3586                          *
3587                          *      a) The way tcp_data behaves at the bottom.
3588                          *      b) A fin takes effect when read not when received.
3589                          */
3590                          
3591                         shut_seq=sk->acked_seq+1;       /* Last byte */
3592                         
3593                         if(after(new_seq,shut_seq))
3594                         {
3595                                 if(sk->debug)
3596                                         printk("Data arrived on %p after close [Data right edge %lX, Socket shut on %lX] %d\n",
3597                                                 sk, new_seq, shut_seq, sk->blog);
3598                                 if(sk->dead)
3599                                 {
3600                                         sk->acked_seq = new_seq + th->fin;
3601                                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3602                                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3603                                         tcp_statistics.TcpEstabResets++;
3604                                         tcp_set_state(sk,TCP_CLOSE);
3605                                         sk->err = EPIPE;
3606                                         sk->shutdown = SHUTDOWN_MASK;
3607                                         kfree_skb(skb, FREE_READ);
3608                                         return 0;
3609                                 }
3610                         }
3611                 }
3612         }
3613 
3614         /*
3615          *      Now we have to walk the chain, and figure out where this one
3616          *      goes into it.  This is set up so that the last packet we received
3617          *      will be the first one we look at, that way if everything comes
3618          *      in order, there will be no performance loss, and if they come
3619          *      out of order we will be able to fit things in nicely.
3620          *
3621          *      [AC: This is wrong. We should assume in order first and then walk
3622          *       forwards from the first hole based upon real traffic patterns.]
3623          *      
3624          */
3625 
3626         /* 
3627          *      This should start at the last one, and then go around forwards.
3628          */
3629 
3630         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3631         {
3632                 skb_queue_head(&sk->receive_queue,skb);
3633                 skb1= NULL;
3634         } 
3635         else
3636         {
3637                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3638                 {
3639                         if(sk->debug)
3640                         {
3641                                 printk("skb1=%p :", skb1);
3642                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3643                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3644                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3645                                                 sk->acked_seq);
3646                         }
3647                         
3648                         /*
3649                          *      Optimisation: Duplicate frame or extension of previous frame from
3650                          *      same sequence point (lost ack case).
3651                          *      The frame contains duplicate data or replaces a previous frame
3652                          *      discard the previous frame (safe as sk->inuse is set) and put
3653                          *      the new one in its place.
3654                          */
3655                          
3656                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3657                         {
3658                                 skb_append(skb1,skb);
3659                                 skb_unlink(skb1);
3660                                 kfree_skb(skb1,FREE_READ);
3661                                 dup_dumped=1;
3662                                 skb1=NULL;
3663                                 break;
3664                         }
3665                         
3666                         /*
3667                          *      Found where it fits
3668                          */
3669                          
3670                         if (after(th->seq+1, skb1->h.th->seq))
3671                         {
3672                                 skb_append(skb1,skb);
3673                                 break;
3674                         }
3675                         
3676                         /*
3677                          *      See if we've hit the start. If so insert.
3678                          */
3679                         if (skb1 == skb_peek(&sk->receive_queue))
3680                         {
3681                                 skb_queue_head(&sk->receive_queue, skb);
3682                                 break;
3683                         }
3684                 }
3685         }
3686 
3687         /*
3688          *      Figure out what the ack value for this frame is
3689          */
3690          
3691         th->ack_seq = th->seq + skb->len;
3692         if (th->syn) 
3693                 th->ack_seq++;
3694         if (th->fin)
3695                 th->ack_seq++;
3696 
3697         if (before(sk->acked_seq, sk->copied_seq)) 
3698         {
3699                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3700                 sk->acked_seq = sk->copied_seq;
3701         }
3702 
3703         /*
3704          *      Now figure out if we can ack anything. This is very messy because we really want two
3705          *      receive queues, a completed and an assembly queue. We also want only one transmit
3706          *      queue.
3707          */
3708 
3709         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3710         {
3711                 if (before(th->seq, sk->acked_seq+1)) 
3712                 {
3713                         int newwindow;
3714 
3715                         if (after(th->ack_seq, sk->acked_seq)) 
3716                         {
3717                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3718                                 if (newwindow < 0)
3719                                         newwindow = 0;  
3720                                 sk->window = newwindow;
3721                                 sk->acked_seq = th->ack_seq;
3722                         }
3723                         skb->acked = 1;
3724 
3725                         /*
3726                          *      When we ack the fin, we do the FIN 
3727                          *      processing.
3728                          */
3729 
3730                         if (skb->h.th->fin) 
3731                         {
3732                                 tcp_fin(skb,sk,skb->h.th);
3733                         }
3734           
3735                         for(skb2 = skb->next;
3736                             skb2 != (struct sk_buff *)&sk->receive_queue;
3737                             skb2 = skb2->next) 
3738                         {
3739                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3740                                 {
3741                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3742                                         {
3743                                                 newwindow = sk->window -
3744                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3745                                                 if (newwindow < 0)
3746                                                         newwindow = 0;  
3747                                                 sk->window = newwindow;
3748                                                 sk->acked_seq = skb2->h.th->ack_seq;
3749                                         }
3750                                         skb2->acked = 1;
3751                                         /*
3752                                          *      When we ack the fin, we do
3753                                          *      the fin handling.
3754                                          */
3755                                         if (skb2->h.th->fin) 
3756                                         {
3757                                                 tcp_fin(skb,sk,skb->h.th);
3758                                         }
3759 
3760                                         /*
3761                                          *      Force an immediate ack.
3762                                          */
3763                                          
3764                                         sk->ack_backlog = sk->max_ack_backlog;
3765                                 }
3766                                 else
3767                                 {
3768                                         break;
3769                                 }
3770                         }
3771 
3772                         /*
3773                          *      This also takes care of updating the window.
3774                          *      This if statement needs to be simplified.
3775                          */
3776                         if (!sk->delay_acks ||
3777                             sk->ack_backlog >= sk->max_ack_backlog || 
3778                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3779         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3780                         }
3781                         else 
3782                         {
3783                                 sk->ack_backlog++;
3784                                 if(sk->debug)
3785                                         printk("Ack queued.\n");
3786                                 reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3787                         }
3788                 }
3789         }
3790 
3791         /*
3792          *      If we've missed a packet, send an ack.
3793          *      Also start a timer to send another.
3794          */
3795          
3796         if (!skb->acked) 
3797         {
3798         
3799         /*
3800          *      This is important.  If we don't have much room left,
3801          *      we need to throw out a few packets so we have a good
3802          *      window.  Note that mtu is used, not mss, because mss is really
3803          *      for the send side.  He could be sending us stuff as large as mtu.
3804          */
3805                  
3806                 while (sk->prot->rspace(sk) < sk->mtu) 
3807                 {
3808                         skb1 = skb_peek(&sk->receive_queue);
3809                         if (skb1 == NULL) 
3810                         {
3811                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3812                                 break;
3813                         }
3814 
3815                         /*
3816                          *      Don't throw out something that has been acked. 
3817                          */
3818                  
3819                         if (skb1->acked) 
3820                         {
3821                                 break;
3822                         }
3823                 
3824                         skb_unlink(skb1);
3825                         kfree_skb(skb1, FREE_READ);
3826                 }
3827                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3828                 sk->ack_backlog++;
3829                 reset_xmit_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3830         }
3831         else
3832         {
3833                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3834         }
3835 
3836         /*
3837          *      Now tell the user we may have some data. 
3838          */
3839          
3840         if (!sk->dead) 
3841         {
3842                 if(sk->debug)
3843                         printk("Data wakeup.\n");
3844                 sk->data_ready(sk,0);
3845         } 
3846         return(0);
3847 }
3848 
3849 
3850 /*
3851  *      This routine is only called when we have urgent data
3852  *      signalled. Its the 'slow' part of tcp_urg. It could be
3853  *      moved inline now as tcp_urg is only called from one
3854  *      place. We handle URGent data wrong. We have to - as
3855  *      BSD still doesn't use the correction from RFC961.
3856  */
3857  
3858 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3859 {
3860         unsigned long ptr = ntohs(th->urg_ptr);
3861 
3862         if (ptr)
3863                 ptr--;
3864         ptr += th->seq;
3865 
3866         /* ignore urgent data that we've already seen and read */
3867         if (after(sk->copied_seq, ptr))
3868                 return;
3869 
3870         /* do we already have a newer (or duplicate) urgent pointer? */
3871         if (sk->urg_data && !after(ptr, sk->urg_seq))
3872                 return;
3873 
3874         /* tell the world about our new urgent pointer */
3875         if (sk->proc != 0) {
3876                 if (sk->proc > 0) {
3877                         kill_proc(sk->proc, SIGURG, 1);
3878                 } else {
3879                         kill_pg(-sk->proc, SIGURG, 1);
3880                 }
3881         }
3882         sk->urg_data = URG_NOTYET;
3883         sk->urg_seq = ptr;
3884 }
3885 
3886 /*
3887  *      This is the 'fast' part of urgent handling.
3888  */
3889  
3890 extern __inline__ int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3891         unsigned long saddr, unsigned long len)
3892 {
3893         unsigned long ptr;
3894 
3895         /*
3896          *      Check if we get a new urgent pointer - normally not 
3897          */
3898          
3899         if (th->urg)
3900                 tcp_check_urg(sk,th);
3901 
3902         /*
3903          *      Do we wait for any urgent data? - normally not
3904          */
3905          
3906         if (sk->urg_data != URG_NOTYET)
3907                 return 0;
3908 
3909         /*
3910          *      Is the urgent pointer pointing into this packet? 
3911          */
3912          
3913         ptr = sk->urg_seq - th->seq + th->doff*4;
3914         if (ptr >= len)
3915                 return 0;
3916 
3917         /*
3918          *      Ok, got the correct packet, update info 
3919          */
3920          
3921         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3922         if (!sk->dead)
3923                 sk->data_ready(sk,0);
3924         return 0;
3925 }
3926 
3927 /*
3928  *      This will accept the next outstanding connection. 
3929  */
3930  
3931 static struct sock *tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3932 {
3933         struct sock *newsk;
3934         struct sk_buff *skb;
3935   
3936   /*
3937    * We need to make sure that this socket is listening,
3938    * and that it has something pending.
3939    */
3940 
3941         if (sk->state != TCP_LISTEN) 
3942         {
3943                 sk->err = EINVAL;
3944                 return(NULL); 
3945         }
3946 
3947         /* Avoid the race. */
3948         cli();
3949         sk->inuse = 1;
3950 
3951         while((skb = tcp_dequeue_established(sk)) == NULL) 
3952         {
3953                 if (flags & O_NONBLOCK) 
3954                 {
3955                         sti();
3956                         release_sock(sk);
3957                         sk->err = EAGAIN;
3958                         return(NULL);
3959                 }
3960 
3961                 release_sock(sk);
3962                 interruptible_sleep_on(sk->sleep);
3963                 if (current->signal & ~current->blocked) 
3964                 {
3965                         sti();
3966                         sk->err = ERESTARTSYS;
3967                         return(NULL);
3968                 }
3969                 sk->inuse = 1;
3970         }
3971         sti();
3972 
3973         /*
3974          *      Now all we need to do is return skb->sk. 
3975          */
3976 
3977         newsk = skb->sk;
3978 
3979         kfree_skb(skb, FREE_READ);
3980         sk->ack_backlog--;
3981         release_sock(sk);
3982         return(newsk);
3983 }
3984 
3985 
3986 /*
3987  *      This will initiate an outgoing connection. 
3988  */
3989  
3990 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3991 {
3992         struct sk_buff *buff;
3993         struct device *dev=NULL;
3994         unsigned char *ptr;
3995         int tmp;
3996         int atype;
3997         struct tcphdr *t1;
3998         struct rtable *rt;
3999 
4000         if (sk->state != TCP_CLOSE) 
4001         {
4002                 return(-EISCONN);
4003         }
4004         
4005         if (addr_len < 8) 
4006                 return(-EINVAL);
4007 
4008         if (usin->sin_family && usin->sin_family != AF_INET) 
4009                 return(-EAFNOSUPPORT);
4010 
4011         /*
4012          *      connect() to INADDR_ANY means loopback (BSD'ism).
4013          */
4014         
4015         if(usin->sin_addr.s_addr==INADDR_ANY)
4016                 usin->sin_addr.s_addr=ip_my_addr();
4017                   
4018         /*
4019          *      Don't want a TCP connection going to a broadcast address 
4020          */
4021 
4022         if ((atype=ip_chk_addr(usin->sin_addr.s_addr)) == IS_BROADCAST || atype==IS_MULTICAST) 
4023                 return -ENETUNREACH;
4024   
4025         sk->inuse = 1;
4026         sk->daddr = usin->sin_addr.s_addr;
4027         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
4028         sk->window_seq = sk->write_seq;
4029         sk->rcv_ack_seq = sk->write_seq -1;
4030         sk->err = 0;
4031         sk->dummy_th.dest = usin->sin_port;
4032         release_sock(sk);
4033 
4034         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
4035         if (buff == NULL) 
4036         {
4037                 return(-ENOMEM);
4038         }
4039         sk->inuse = 1;
4040         buff->len = 24;
4041         buff->sk = sk;
4042         buff->free = 0;
4043         buff->localroute = sk->localroute;
4044         
4045         t1 = (struct tcphdr *) buff->data;
4046 
4047         /*
4048          *      Put in the IP header and routing stuff. 
4049          */
4050          
4051         rt=ip_rt_route(sk->daddr, NULL, NULL);
4052         
4053 
4054         /*
4055          *      We need to build the routing stuff from the things saved in skb. 
4056          */
4057 
4058         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4059                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
4060         if (tmp < 0) 
4061         {
4062                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4063                 release_sock(sk);
4064                 return(-ENETUNREACH);
4065         }
4066 
4067         buff->len += tmp;
4068         t1 = (struct tcphdr *)((char *)t1 +tmp);
4069 
4070         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
4071         t1->seq = ntohl(sk->write_seq++);
4072         sk->sent_seq = sk->write_seq;
4073         buff->h.seq = sk->write_seq;
4074         t1->ack = 0;
4075         t1->window = 2;
4076         t1->res1=0;
4077         t1->res2=0;
4078         t1->rst = 0;
4079         t1->urg = 0;
4080         t1->psh = 0;
4081         t1->syn = 1;
4082         t1->urg_ptr = 0;
4083         t1->doff = 6;
4084         /* use 512 or whatever user asked for */
4085         
4086         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
4087                 sk->window_clamp=rt->rt_window;
4088         else
4089                 sk->window_clamp=0;
4090 
4091         if (sk->user_mss)
4092                 sk->mtu = sk->user_mss;
4093         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
4094                 sk->mtu = rt->rt_mss;
4095         else 
4096         {
4097 #ifdef CONFIG_INET_SNARL
4098                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
4099 #else
4100                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
4101 #endif
4102                         sk->mtu = 576 - HEADER_SIZE;
4103                 else
4104                         sk->mtu = MAX_WINDOW;
4105         }
4106         /*
4107          *      but not bigger than device MTU 
4108          */
4109 
4110         if(sk->mtu <32)
4111                 sk->mtu = 32;   /* Sanity limit */
4112                 
4113         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
4114         
4115         /*
4116          *      Put in the TCP options to say MTU. 
4117          */
4118 
4119         ptr = (unsigned char *)(t1+1);
4120         ptr[0] = 2;
4121         ptr[1] = 4;
4122         ptr[2] = (sk->mtu) >> 8;
4123         ptr[3] = (sk->mtu) & 0xff;
4124         tcp_send_check(t1, sk->saddr, sk->daddr,
4125                   sizeof(struct tcphdr) + 4, sk);
4126 
4127         /*
4128          *      This must go first otherwise a really quick response will get reset. 
4129          */
4130 
4131         tcp_set_state(sk,TCP_SYN_SENT);
4132         sk->rto = TCP_TIMEOUT_INIT;
4133         init_timer(&sk->retransmit_timer);
4134         sk->retransmit_timer.function=&retransmit_timer;
4135         sk->retransmit_timer.data = (unsigned long)sk;
4136         reset_xmit_timer(sk, TIME_WRITE, sk->rto);      /* Timer for repeating the SYN until an answer */
4137         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
4138 
4139         sk->prot->queue_xmit(sk, dev, buff, 0);  
4140         reset_xmit_timer(sk, TIME_WRITE, sk->rto);
4141         tcp_statistics.TcpActiveOpens++;
4142         tcp_statistics.TcpOutSegs++;
4143   
4144         release_sock(sk);
4145         return(0);
4146 }
4147 
4148 
4149 /* This functions checks to see if the tcp header is actually acceptable. */
4150 extern __inline__ int tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
4151              struct options *opt, unsigned long saddr, struct device *dev)
4152 {
4153         unsigned long next_seq;
4154 
4155         next_seq = len - 4*th->doff;
4156         if (th->fin)
4157                 next_seq++;
4158         /* if we have a zero window, we can't have any data in the packet.. */
4159         if (next_seq && !sk->window)
4160                 goto ignore_it;
4161         next_seq += th->seq;
4162 
4163         /*
4164          * This isn't quite right.  sk->acked_seq could be more recent
4165          * than sk->window.  This is however close enough.  We will accept
4166          * slightly more packets than we should, but it should not cause
4167          * problems unless someone is trying to forge packets.
4168          */
4169 
4170         /* have we already seen all of this packet? */
4171         if (!after(next_seq+1, sk->acked_seq))
4172                 goto ignore_it;
4173         /* or does it start beyond the window? */
4174         if (!before(th->seq, sk->acked_seq + sk->window + 1))
4175                 goto ignore_it;
4176 
4177         /* ok, at least part of this packet would seem interesting.. */
4178         return 1;
4179 
4180 ignore_it:
4181         if (th->rst)
4182                 return 0;
4183 
4184         /*
4185          *      Send a reset if we get something not ours and we are
4186          *      unsynchronized. Note: We don't do anything to our end. We
4187          *      are just killing the bogus remote connection then we will
4188          *      connect again and it will work (with luck).
4189          */
4190          
4191         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) 
4192         {
4193                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
4194                 return 1;
4195         }
4196 
4197         /* Try to resync things. */
4198         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
4199         return 0;
4200 }
4201 
4202 /*
4203  *      When we get a reset we do this.
4204  */
4205 
4206 static int tcp_std_reset(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
4207 {
4208         sk->zapped = 1;
4209         sk->err = ECONNRESET;
4210         if (sk->state == TCP_SYN_SENT)
4211                 sk->err = ECONNREFUSED;
4212         if (sk->state == TCP_CLOSE_WAIT)
4213                 sk->err = EPIPE;
4214 #ifdef TCP_DO_RFC1337           
4215         /*
4216          *      Time wait assassination protection [RFC1337]
4217          */
4218         if(sk->state!=TCP_TIME_WAIT)
4219         {       
4220                 tcp_set_state(sk,TCP_CLOSE);
4221                 sk->shutdown = SHUTDOWN_MASK;
4222         }
4223 #else   
4224         tcp_set_state(sk,TCP_CLOSE);
4225         sk->shutdown = SHUTDOWN_MASK;
4226 #endif  
4227         if (!sk->dead) 
4228                 sk->state_change(sk);
4229         kfree_skb(skb, FREE_READ);
4230         release_sock(sk);
4231         return(0);
4232 }
4233 
4234 /*
4235  *      A TCP packet has arrived.
4236  */
4237  
4238 int tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
4239         unsigned long daddr, unsigned short len,
4240         unsigned long saddr, int redo, struct inet_protocol * protocol)
4241 {
4242         struct tcphdr *th;
4243         struct sock *sk;
4244         int syn_ok=0;
4245         
4246         if (!skb) 
4247         {
4248                 printk("IMPOSSIBLE 1\n");
4249                 return(0);
4250         }
4251 
4252         if (!dev) 
4253         {
4254                 printk("IMPOSSIBLE 2\n");
4255                 return(0);
4256         }
4257   
4258         tcp_statistics.TcpInSegs++;
4259   
4260         if(skb->pkt_type!=PACKET_HOST)
4261         {
4262                 kfree_skb(skb,FREE_READ);
4263                 return(0);
4264         }
4265   
4266         th = skb->h.th;
4267 
4268         /*
4269          *      Find the socket.
4270          */
4271 
4272         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
4273 
4274         /*
4275          *      If this socket has got a reset its to all intents and purposes 
4276          *      really dead. Count closed sockets as dead.
4277          *
4278          *      Note: BSD appears to have a bug here. A 'closed' TCP in BSD
4279          *      simply drops data. This seems incorrect as a 'closed' TCP doesn't
4280          *      exist so should cause resets as if the port was unreachable.
4281          */
4282          
4283         if (sk!=NULL && (sk->zapped || sk->state==TCP_CLOSE))
4284                 sk=NULL;
4285 
4286         if (!redo) 
4287         {
4288                 if (tcp_check(th, len, saddr, daddr )) 
4289                 {
4290                         skb->sk = NULL;
4291                         kfree_skb(skb,FREE_READ);
4292                         /*
4293                          * We don't release the socket because it was
4294                          * never marked in use.
4295                          */
4296                         return(0);
4297                 }
4298                 th->seq = ntohl(th->seq);
4299 
4300                 /* See if we know about the socket. */
4301                 if (sk == NULL) 
4302                 {
4303                         /*
4304                          *      No such TCB. If th->rst is 0 send a reset (checked in tcp_reset)
4305                          */
4306                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
4307                         skb->sk = NULL;
4308                         /*
4309                          *      Discard frame
4310                          */
4311                         kfree_skb(skb, FREE_READ);
4312                         return(0);
4313                 }
4314 
4315                 skb->len = len;
4316                 skb->acked = 0;
4317                 skb->used = 0;
4318                 skb->free = 0;
4319                 skb->saddr = daddr;
4320                 skb->daddr = saddr;
4321         
4322                 /* We may need to add it to the backlog here. */
4323                 cli();
4324                 if (sk->inuse) 
4325                 {
4326                         skb_queue_tail(&sk->back_log, skb);
4327                         sti();
4328                         return(0);
4329                 }
4330                 sk->inuse = 1;
4331                 sti();
4332         }
4333         else
4334         {
4335                 if (sk==NULL) 
4336                 {
4337                         tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
4338                         skb->sk = NULL;
4339                         kfree_skb(skb, FREE_READ);
4340                         return(0);
4341                 }
4342         }
4343 
4344 
4345         if (!sk->prot) 
4346         {
4347                 printk("IMPOSSIBLE 3\n");
4348                 return(0);
4349         }
4350 
4351 
4352         /*
4353          *      Charge the memory to the socket. 
4354          */
4355          
4356         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
4357         {
4358                 kfree_skb(skb, FREE_READ);
4359                 release_sock(sk);
4360                 return(0);
4361         }
4362 
4363         skb->sk=sk;
4364         sk->rmem_alloc += skb->mem_len;
4365 
4366         /*
4367          *      This basically follows the flow suggested by RFC793, with the corrections in RFC1122. We
4368          *      don't implement precedence and we process URG incorrectly (deliberately so) for BSD bug
4369          *      compatibility. We also set up variables more thoroughly [Karn notes in the
4370          *      KA9Q code the RFC793 incoming segment rules don't initialise the variables for all paths].
4371          */
4372 
4373         if(sk->state!=TCP_ESTABLISHED)          /* Skip this lot for normal flow */
4374         {
4375         
4376                 /*
4377                  *      Now deal with unusual cases.
4378                  */
4379          
4380                 if(sk->state==TCP_LISTEN)
4381                 {
4382                         if(th->ack)     /* These use the socket TOS.. might want to be the received TOS */
4383                                 tcp_reset(daddr,saddr,th,sk->prot,opt,dev,sk->ip_tos, sk->ip_ttl);
4384 
4385                         /*
4386                          *      We don't care for RST, and non SYN are absorbed (old segments)
4387                          *      Broadcast/multicast SYN isn't allowed. Note - bug if you change the
4388                          *      netmask on a running connection it can go broadcast. Even Sun's have
4389                          *      this problem so I'm ignoring it 
4390                          */
4391                            
4392                         if(th->rst || !th->syn || th->ack || ip_chk_addr(daddr)!=IS_MYADDR)
4393                         {
4394                                 kfree_skb(skb, FREE_READ);
4395                                 release_sock(sk);
4396                                 return 0;
4397                         }
4398                 
4399                         /*      
4400                          *      Guess we need to make a new socket up 
4401                          */
4402                 
4403                         tcp_conn_request(sk, skb, daddr, saddr, opt, dev, tcp_init_seq());
4404                 
4405                         /*
4406                          *      Now we have several options: In theory there is nothing else
4407                          *      in the frame. KA9Q has an option to send data with the syn,
4408                          *      BSD accepts data with the syn up to the [to be] advertised window
4409                          *      and Solaris 2.1 gives you a protocol error. For now we just ignore
4410                          *      it, that fits the spec precisely and avoids incompatibilities. It
4411                          *      would be nice in future to drop through and process the data.
4412                          */
4413                          
4414                         release_sock(sk);
4415                         return 0;
4416                 }
4417         
4418                 /* retransmitted SYN? */
4419                 if (sk->state == TCP_SYN_RECV && th->syn && th->seq+1 == sk->acked_seq)
4420                 {
4421                         kfree_skb(skb, FREE_READ);
4422                         release_sock(sk);
4423                         return 0;
4424                 }
4425                 
4426                 /*
4427                  *      SYN sent means we have to look for a suitable ack and either reset
4428                  *      for bad matches or go to connected 
4429                  */
4430            
4431                 if(sk->state==TCP_SYN_SENT)
4432                 {
4433                         /* Crossed SYN or previous junk segment */
4434                         if(th->ack)
4435                         {
4436                                 /* We got an ack, but its not a good ack */
4437                                 if(!tcp_ack(sk,th,saddr,len))
4438                                 {
4439                                         /* Reset the ack - its an ack from a 
4440                                            different connection  [ th->rst is checked in tcp_reset()] */
4441                                         tcp_statistics.TcpAttemptFails++;
4442                                         tcp_reset(daddr, saddr, th,
4443                                                 sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4444                                         kfree_skb(skb, FREE_READ);
4445                                         release_sock(sk);
4446                                         return(0);
4447                                 }
4448                                 if(th->rst)
4449                                         return tcp_std_reset(sk,skb);
4450                                 if(!th->syn)
4451                                 {
4452                                         /* A valid ack from a different connection
4453                                            start. Shouldn't happen but cover it */
4454                                         kfree_skb(skb, FREE_READ);
4455                                         release_sock(sk);
4456                                         return 0;
4457                                 }
4458                                 /*
4459                                  *      Ok.. its good. Set up sequence numbers and
4460                                  *      move to established.
4461                                  */
4462                                 syn_ok=1;       /* Don't reset this connection for the syn */
4463                                 sk->acked_seq=th->seq+1;
4464                                 sk->fin_seq=th->seq;
4465                                 tcp_send_ack(sk->sent_seq,sk->acked_seq,sk,th,sk->daddr);
4466                                 tcp_set_state(sk, TCP_ESTABLISHED);
4467                                 tcp_options(sk,th);
4468                                 sk->dummy_th.dest=th->source;
4469                                 sk->copied_seq = sk->acked_seq;
4470                                 if(!sk->dead)
4471                                 {
4472                                         sk->state_change(sk);
4473                                         sock_wake_async(sk->socket, 0);
4474                                 }
4475                                 if(sk->max_window==0)
4476                                 {
4477                                         sk->max_window = 32;
4478                                         sk->mss = min(sk->max_window, sk->mtu);
4479                                 }
4480                         }
4481                         else
4482                         {
4483                                 /* See if SYN's cross. Drop if boring */
4484                                 if(th->syn && !th->rst)
4485                                 {
4486                                         /* Crossed SYN's are fine - but talking to
4487                                            yourself is right out... */
4488                                         if(sk->saddr==saddr && sk->daddr==daddr &&
4489                                                 sk->dummy_th.source==th->source &&
4490                                                 sk->dummy_th.dest==th->dest)
4491                                         {
4492                                                 tcp_statistics.TcpAttemptFails++;
4493                                                 return tcp_std_reset(sk,skb);
4494                                         }
4495                                         tcp_set_state(sk,TCP_SYN_RECV);
4496                                         
4497                                         /*
4498                                          *      FIXME:
4499                                          *      Must send SYN|ACK here
4500                                          */
4501                                 }               
4502                                 /* Discard junk segment */
4503                                 kfree_skb(skb, FREE_READ);
4504                                 release_sock(sk);
4505                                 return 0;
4506                         }
4507                         /*
4508                          *      SYN_RECV with data maybe.. drop through
4509                          */
4510                         goto rfc_step6;
4511                 }
4512 
4513         /* BSD has a funny hack with TIME_WAIT and fast reuse of a port. There is
4514            a more complex suggestion for fixing these reuse issues in RFC1644
4515            but not yet ready for general use. Also see RFC1379.*/
4516         
4517 #define BSD_TIME_WAIT
4518 #ifdef BSD_TIME_WAIT
4519                 if (sk->state == TCP_TIME_WAIT && th->syn && sk->dead && 
4520                         after(th->seq, sk->acked_seq) && !th->rst)
4521                 {
4522                         long seq=sk->write_seq;
4523                         if(sk->debug)
4524                                 printk("Doing a BSD time wait\n");
4525                         tcp_statistics.TcpEstabResets++;           
4526                         sk->rmem_alloc -= skb->mem_len;
4527                         skb->sk = NULL;
4528                         sk->err=ECONNRESET;
4529                         tcp_set_state(sk, TCP_CLOSE);
4530                         sk->shutdown = SHUTDOWN_MASK;
4531                         release_sock(sk);
4532                         sk=get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
4533                         if (sk && sk->state==TCP_LISTEN)
4534                         {
4535                                 sk->inuse=1;
4536                                 skb->sk = sk;
4537                                 sk->rmem_alloc += skb->mem_len;
4538                                 tcp_conn_request(sk, skb, daddr, saddr,opt, dev,seq+128000);
4539                                 release_sock(sk);
4540                                 return 0;
4541                         }
4542                         kfree_skb(skb, FREE_READ);
4543                         return 0;
4544                 }
4545 #endif  
4546         }
4547 
4548         /* We are now in normal data flow (see the step list in the RFC) */
4549         /* Note most of these are inline now. I'll inline the lot when
4550            I have time to test it hard and look at what gcc outputs */
4551         
4552         if(!tcp_sequence(sk,th,len,opt,saddr,dev))
4553         {
4554                 kfree_skb(skb, FREE_READ);
4555                 release_sock(sk);
4556                 return 0;
4557         }
4558 
4559         if(th->rst)
4560                 return tcp_std_reset(sk,skb);
4561         
4562         /*
4563          *      !syn_ok is effectively the state test in RFC793.
4564          */
4565          
4566         if(th->syn && !syn_ok)
4567         {
4568                 tcp_reset(daddr,saddr,th, &tcp_prot, opt, dev, skb->ip_hdr->tos, 255);
4569                 return tcp_std_reset(sk,skb);   
4570         }
4571 
4572         /*
4573          *      Process the ACK
4574          */
4575          
4576 
4577         if(th->ack && !tcp_ack(sk,th,saddr,len))
4578         {
4579                 /*
4580                  *      Our three way handshake failed.
4581                  */
4582                  
4583                 if(sk->state==TCP_SYN_RECV)
4584                 {
4585                         tcp_reset(daddr, saddr, th,sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4586                 }
4587                 kfree_skb(skb, FREE_READ);
4588                 release_sock(sk);
4589                 return 0;
4590         }
4591         
4592 rfc_step6:              /* I'll clean this up later */
4593 
4594         /*
4595          *      Process urgent data
4596          */
4597                 
4598         if(tcp_urg(sk, th, saddr, len))
4599         {
4600                 kfree_skb(skb, FREE_READ);
4601                 release_sock(sk);
4602                 return 0;
4603         }
4604         
4605         
4606         /*
4607          *      Process the encapsulated data
4608          */
4609         
4610         if(tcp_data(skb,sk, saddr, len))
4611         {
4612                 kfree_skb(skb, FREE_READ);
4613                 release_sock(sk);
4614                 return 0;
4615         }
4616 
4617         /*
4618          *      And done
4619          */     
4620         
4621         release_sock(sk);
4622         return 0;
4623 }
4624 
4625 /*
4626  *      This routine sends a packet with an out of date sequence
4627  *      number. It assumes the other end will try to ack it.
4628  */
4629 
4630 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4631 {
4632         struct sk_buff *buff;
4633         struct tcphdr *t1;
4634         struct device *dev=NULL;
4635         int tmp;
4636 
4637         if (sk->zapped)
4638                 return; /* After a valid reset we can send no more */
4639 
4640         /*
4641          *      Write data can still be transmitted/retransmitted in the
4642          *      following states.  If any other state is encountered, return.
4643          */
4644 
4645         if (sk->state != TCP_ESTABLISHED && 
4646             sk->state != TCP_CLOSE_WAIT &&
4647             sk->state != TCP_FIN_WAIT1 && 
4648             sk->state != TCP_LAST_ACK &&
4649             sk->state != TCP_CLOSING
4650         ) 
4651         {
4652                 return;
4653         }
4654 
4655         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4656         if (buff == NULL) 
4657                 return;
4658 
4659         buff->len = sizeof(struct tcphdr);
4660         buff->free = 1;
4661         buff->sk = sk;
4662         buff->localroute = sk->localroute;
4663 
4664         t1 = (struct tcphdr *) buff->data;
4665 
4666         /* Put in the IP header and routing stuff. */
4667         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4668                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4669         if (tmp < 0) 
4670         {
4671                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4672                 return;
4673         }
4674 
4675         buff->len += tmp;
4676         t1 = (struct tcphdr *)((char *)t1 +tmp);
4677 
4678         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4679 
4680         /*
4681          * Use a previous sequence.
4682          * This should cause the other end to send an ack.
4683          */
4684         t1->seq = htonl(sk->sent_seq-1);
4685         t1->ack = 1; 
4686         t1->res1= 0;
4687         t1->res2= 0;
4688         t1->rst = 0;
4689         t1->urg = 0;
4690         t1->psh = 0;
4691         t1->fin = 0;    /* We are sending a 'previous' sequence, and 0 bytes of data - thus no FIN bit */
4692         t1->syn = 0;
4693         t1->ack_seq = ntohl(sk->acked_seq);
4694         t1->window = ntohs(tcp_select_window(sk));
4695         t1->doff = sizeof(*t1)/4;
4696         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4697 
4698          /*     Send it and free it.
4699           *     This will prevent the timer from automatically being restarted.
4700           */
4701         sk->prot->queue_xmit(sk, dev, buff, 1);
4702         tcp_statistics.TcpOutSegs++;
4703 }
4704 
4705 /*
4706  *      A window probe timeout has occurred.
4707  */
4708 
4709 void tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4710 {
4711         if (sk->zapped)
4712                 return;         /* After a valid reset we can send no more */
4713 
4714         tcp_write_wakeup(sk);
4715 
4716         sk->backoff++;
4717         sk->rto = min(sk->rto << 1, 120*HZ);
4718         reset_xmit_timer (sk, TIME_PROBE0, sk->rto);
4719         sk->retransmits++;
4720         sk->prot->retransmits ++;
4721 }
4722 
4723 /*
4724  *      Socket option code for TCP. 
4725  */
4726   
4727 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4728 {
4729         int val,err;
4730 
4731         if(level!=SOL_TCP)
4732                 return ip_setsockopt(sk,level,optname,optval,optlen);
4733 
4734         if (optval == NULL) 
4735                 return(-EINVAL);
4736 
4737         err=verify_area(VERIFY_READ, optval, sizeof(int));
4738         if(err)
4739                 return err;
4740         
4741         val = get_fs_long((unsigned long *)optval);
4742 
4743         switch(optname)
4744         {
4745                 case TCP_MAXSEG:
4746 /*
4747  * values greater than interface MTU won't take effect.  however at
4748  * the point when this call is done we typically don't yet know
4749  * which interface is going to be used
4750  */
4751                         if(val<1||val>MAX_WINDOW)
4752                                 return -EINVAL;
4753                         sk->user_mss=val;
4754                         return 0;
4755                 case TCP_NODELAY:
4756                         sk->nonagle=(val==0)?0:1;
4757                         return 0;
4758                 default:
4759                         return(-ENOPROTOOPT);
4760         }
4761 }
4762 
4763 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4764 {
4765         int val,err;
4766 
4767         if(level!=SOL_TCP)
4768                 return ip_getsockopt(sk,level,optname,optval,optlen);
4769                         
4770         switch(optname)
4771         {
4772                 case TCP_MAXSEG:
4773                         val=sk->user_mss;
4774                         break;
4775                 case TCP_NODELAY:
4776                         val=sk->nonagle;
4777                         break;
4778                 default:
4779                         return(-ENOPROTOOPT);
4780         }
4781         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4782         if(err)
4783                 return err;
4784         put_fs_long(sizeof(int),(unsigned long *) optlen);
4785 
4786         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4787         if(err)
4788                 return err;
4789         put_fs_long(val,(unsigned long *)optval);
4790 
4791         return(0);
4792 }       
4793 
4794 
4795 struct proto tcp_prot = {
4796         sock_wmalloc,
4797         sock_rmalloc,
4798         sock_wfree,
4799         sock_rfree,
4800         sock_rspace,
4801         sock_wspace,
4802         tcp_close,
4803         tcp_read,
4804         tcp_write,
4805         tcp_sendto,
4806         tcp_recvfrom,
4807         ip_build_header,
4808         tcp_connect,
4809         tcp_accept,
4810         ip_queue_xmit,
4811         tcp_retransmit,
4812         tcp_write_wakeup,
4813         tcp_read_wakeup,
4814         tcp_rcv,
4815         tcp_select,
4816         tcp_ioctl,
4817         NULL,
4818         tcp_shutdown,
4819         tcp_setsockopt,
4820         tcp_getsockopt,
4821         128,
4822         0,
4823         {NULL,},
4824         "TCP"
4825 };

/* [previous][next][first][last][top][bottom][index][help] */