root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_set_state
  3. tcp_select_window
  4. tcp_find_established
  5. tcp_dequeue_established
  6. tcp_time_wait
  7. tcp_retransmit
  8. tcp_err
  9. tcp_readable
  10. tcp_select
  11. tcp_ioctl
  12. tcp_check
  13. tcp_send_check
  14. tcp_send_skb
  15. tcp_dequeue_partial
  16. tcp_send_partial
  17. tcp_enqueue_partial
  18. tcp_send_ack
  19. tcp_build_header
  20. tcp_write
  21. tcp_sendto
  22. tcp_read_wakeup
  23. cleanup_rbuf
  24. tcp_read_urg
  25. tcp_read
  26. tcp_shutdown
  27. tcp_recvfrom
  28. tcp_reset
  29. tcp_options
  30. default_mask
  31. tcp_conn_request
  32. tcp_close
  33. tcp_write_xmit
  34. sort_send
  35. tcp_ack
  36. tcp_data
  37. tcp_check_urg
  38. tcp_urg
  39. tcp_fin
  40. tcp_accept
  41. tcp_connect
  42. tcp_sequence
  43. tcp_clean_end
  44. tcp_rcv
  45. tcp_write_wakeup
  46. tcp_send_probe0
  47. tcp_setsockopt
  48. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *              Arnt Gulbrandsen, <agulbra@no.unit.nvg>
  20  *
  21  * Fixes:       
  22  *              Alan Cox        :       Numerous verify_area() calls
  23  *              Alan Cox        :       Set the ACK bit on a reset
  24  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  25  *                                      and was trying to connect (tcp_err()).
  26  *              Alan Cox        :       All icmp error handling was broken
  27  *                                      pointers passed where wrong and the
  28  *                                      socket was looked up backwards. Nobody
  29  *                                      tested any icmp error code obviously.
  30  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  31  *                                      on errors. select behaves and the icmp error race
  32  *                                      has gone by moving it into sock.c
  33  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  34  *                                      packets for unknown sockets.
  35  *              Alan Cox        :       tcp option processing.
  36  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  37  *              Herp Rosmanith  :       More reset fixes
  38  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  39  *                                      any kind of RST is right out.
  40  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  41  *                                      otherwise odd bits of prattle escape still
  42  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  43  *                                      LAN workplace lockups.
  44  *              Alan Cox        :       Some tidyups using the new skb list facilities
  45  *              Alan Cox        :       sk->keepopen now seems to work
  46  *              Alan Cox        :       Pulls options out correctly on accepts
  47  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  48  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  49  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  50  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  51  *              Alan Cox        :       Removed incorrect check for 20 * psh
  52  *      Michael O'Reilly        :       ack < copied bug fix.
  53  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  54  *              Alan Cox        :       FIN with no memory -> CRASH
  55  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  56  *              Alan Cox        :       Added TCP options (SOL_TCP)
  57  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  58  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  59  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  60  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  61  *              Alan Cox        :       Put in missing check for SYN bit.
  62  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  63  *                                      window non shrink trick.
  64  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  65  *              Charles Hedrick :       TCP fixes
  66  *              Toomas Tamm     :       TCP window fixes
  67  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  68  *              Charles Hedrick :       Rewrote most of it to actually work
  69  *              Linus           :       Rewrote tcp_read() and URG handling
  70  *                                      completely
  71  *              Gerhard Koerting:       Fixed some missing timer handling
  72  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  73  *              Gerhard Koerting:       PC/TCP workarounds
  74  *              Adam Caldwell   :       Assorted timer/timing errors
  75  *              Matthew Dillon  :       Fixed another RST bug
  76  *              Alan Cox        :       Move to kernel side addressing changes.
  77  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  78  *              Arnt Gulbrandsen:       Turbocharged tcp_check() routine.
  79  *              Alan Cox        :       TCP fast path debugging
  80  *              Alan Cox        :       Window clamping
  81  *              Michael Riepe   :       Bug in tcp_check()
  82  *              Matt Dillon     :       More TCP improvements and RST bug fixes
  83  *              Matt Dillon     :       Yet more small nasties remove from the TCP code
  84  *                                      (Be very nice to this man if tcp finally works 100%) 8)
  85  *              Alan Cox        :       BSD accept semantics. 
  86  *
  87  *
  88  * To Fix:
  89  *                      Fast path the code. Two things here - fix the window calculation
  90  *              so it doesn't iterate over the queue, also spot packets with no funny
  91  *              options arriving in order and process directly.
  92  *
  93  *              This program is free software; you can redistribute it and/or
  94  *              modify it under the terms of the GNU General Public License
  95  *              as published by the Free Software Foundation; either version
  96  *              2 of the License, or(at your option) any later version.
  97  *
  98  * Description of States:
  99  *
 100  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 101  *
 102  *      TCP_SYN_RECV            received a connection request, sent ack,
 103  *                              waiting for final ack in three-way handshake.
 104  *
 105  *      TCP_ESTABLISHED         connection established
 106  *
 107  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 108  *                              transmission of remaining buffered data
 109  *
 110  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 111  *                              to shutdown
 112  *
 113  *      TCP_CLOSING             both sides have shutdown but we still have
 114  *                              data we have to finish sending
 115  *
 116  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 117  *                              closed, can only be entered from FIN_WAIT2
 118  *                              or CLOSING.  Required because the other end
 119  *                              may not have gotten our last ACK causing it
 120  *                              to retransmit the data packet (which we ignore)
 121  *
 122  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 123  *                              us to finish writing our data and to shutdown
 124  *                              (we have to close() to move on to LAST_ACK)
 125  *
 126  *      TCP_LAST_ACK            out side has shutdown after remote has
 127  *                              shutdown.  There may still be data in our
 128  *                              buffer that we have to finish sending
 129  *              
 130  *      TCP_CLOSE               socket is finished
 131  */
 132 #include <linux/types.h>
 133 #include <linux/sched.h>
 134 #include <linux/mm.h>
 135 #include <linux/string.h>
 136 #include <linux/socket.h>
 137 #include <linux/sockios.h>
 138 #include <linux/termios.h>
 139 #include <linux/in.h>
 140 #include <linux/fcntl.h>
 141 #include <linux/inet.h>
 142 #include <linux/netdevice.h>
 143 #include "snmp.h"
 144 #include "ip.h"
 145 #include "protocol.h"
 146 #include "icmp.h"
 147 #include "tcp.h"
 148 #include <linux/skbuff.h>
 149 #include "sock.h"
 150 #include "route.h"
 151 #include <linux/errno.h>
 152 #include <linux/timer.h>
 153 #include <asm/system.h>
 154 #include <asm/segment.h>
 155 #include <linux/mm.h>
 156 
 157 #undef TCP_FASTPATH
 158 
 159 #define SEQ_TICK 3
 160 unsigned long seq_offset;
 161 struct tcp_mib  tcp_statistics;
 162 
 163 #ifdef TCP_FASTPATH
 164 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 165 #endif
 166 
 167 
 168 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170         if (a < b) 
 171                 return(a);
 172         return(b);
 173 }
 174 
 175 #undef STATE_TRACE
 176 
 177 static __inline__ void tcp_set_state(struct sock *sk, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 178 {
 179         if(sk->state==TCP_ESTABLISHED)
 180                 tcp_statistics.TcpCurrEstab--;
 181 #ifdef STATE_TRACE
 182         if(sk->debug)
 183                 printk("TCP sk=%s, State %d -> %d\n",sk, sk->state,state);
 184 #endif  
 185         sk->state=state;
 186         if(state==TCP_ESTABLISHED)
 187                 tcp_statistics.TcpCurrEstab++;
 188 }
 189 
 190 /* This routine picks a TCP windows for a socket based on
 191    the following constraints
 192    
 193    1. The window can never be shrunk once it is offered (RFC 793)
 194    2. We limit memory per socket
 195    
 196    For now we use NET2E3's heuristic of offering half the memory
 197    we have handy. All is not as bad as this seems however because
 198    of two things. Firstly we will bin packets even within the window
 199    in order to get the data we are waiting for into the memory limit.
 200    Secondly we bin common duplicate forms at receive time
 201    
 202    Better heuristics welcome
 203 */
 204    
 205 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 206 {
 207         int new_window = sk->prot->rspace(sk);
 208         
 209         if(sk->window_clamp)
 210                 new_window=min(sk->window_clamp,new_window);
 211 /*
 212  * two things are going on here.  First, we don't ever offer a
 213  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 214  * receiver side of SWS as specified in RFC1122.
 215  * Second, we always give them at least the window they
 216  * had before, in order to avoid retracting window.  This
 217  * is technically allowed, but RFC1122 advises against it and
 218  * in practice it causes trouble.
 219  */
 220         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 221                 return(sk->window);
 222         return(new_window);
 223 }
 224 
 225 /*
 226  *      Find someone to 'accept'. Must be called with
 227  *      sk->inuse=1 or cli()
 228  */ 
 229 
 230 static struct sk_buff *tcp_find_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 231 {
 232         struct sk_buff *p=skb_peek(&s->receive_queue);
 233         if(p==NULL)
 234                 return NULL;
 235         do
 236         {
 237                 if(p->sk->state>=TCP_ESTABLISHED)
 238                         return p;
 239                 p=p->next;
 240         }
 241         while(p!=skb_peek(&s->receive_queue));
 242         return NULL;
 243 }
 244 
 245 static struct sk_buff *tcp_dequeue_established(struct sock *s)
     /* [previous][next][first][last][top][bottom][index][help] */
 246 {
 247         struct sk_buff *skb;
 248         unsigned long flags;
 249         save_flags(flags);
 250         cli(); 
 251         skb=tcp_find_established(s);
 252         if(skb!=NULL)
 253                 skb_unlink(skb);        /* Take it off the queue */
 254         restore_flags(flags);
 255         return skb;
 256 }
 257 
 258 
 259 /*
 260  *      Enter the time wait state. 
 261  */
 262 
 263 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 264 {
 265         tcp_set_state(sk,TCP_TIME_WAIT);
 266         sk->shutdown = SHUTDOWN_MASK;
 267         if (!sk->dead)
 268                 sk->state_change(sk);
 269         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 270 }
 271 
 272 /*
 273  *      A timer event has trigger a tcp retransmit timeout. The
 274  *      socket xmit queue is ready and set up to send. Because
 275  *      the ack receive code keeps the queue straight we do
 276  *      nothing clever here.
 277  */
 278 
 279 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 280 {
 281         if (all) 
 282         {
 283                 ip_retransmit(sk, all);
 284                 return;
 285         }
 286 
 287         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 288         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 289         sk->cong_count = 0;
 290 
 291         sk->cong_window = 1;
 292 
 293         /* Do the actual retransmit. */
 294         ip_retransmit(sk, all);
 295 }
 296 
 297 
 298 /*
 299  * This routine is called by the ICMP module when it gets some
 300  * sort of error condition.  If err < 0 then the socket should
 301  * be closed and the error returned to the user.  If err > 0
 302  * it's just the icmp type << 8 | icmp code.  After adjustment
 303  * header points to the first 8 bytes of the tcp header.  We need
 304  * to find the appropriate port.
 305  */
 306 
 307 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 308         unsigned long saddr, struct inet_protocol *protocol)
 309 {
 310         struct tcphdr *th;
 311         struct sock *sk;
 312         struct iphdr *iph=(struct iphdr *)header;
 313   
 314         header+=4*iph->ihl;
 315    
 316 
 317         th =(struct tcphdr *)header;
 318         sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 319 
 320         if (sk == NULL) 
 321                 return;
 322   
 323         if(err<0)
 324         {
 325                 sk->err = -err;
 326                 sk->error_report(sk);
 327                 return;
 328         }
 329 
 330         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 331         {
 332                 /*
 333                  * FIXME:
 334                  * For now we will just trigger a linear backoff.
 335                  * The slow start code should cause a real backoff here.
 336                  */
 337                 if (sk->cong_window > 4)
 338                         sk->cong_window--;
 339                 return;
 340         }
 341 
 342 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 343 
 344         /*
 345          * If we've already connected we will keep trying
 346          * until we time out, or the user gives up.
 347          */
 348 
 349         if (icmp_err_convert[err & 0xff].fatal || sk->state == TCP_SYN_SENT) 
 350         {
 351                 if (sk->state == TCP_SYN_SENT) 
 352                 {
 353                         tcp_statistics.TcpAttemptFails++;
 354                         tcp_set_state(sk,TCP_CLOSE);
 355                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 356                 }
 357                 sk->err = icmp_err_convert[err & 0xff].errno;           
 358         }
 359         return;
 360 }
 361 
 362 
 363 /*
 364  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 365  *      in the received data queue (ie a frame missing that needs sending to us)
 366  */
 367 
 368 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 369 {
 370         unsigned long counted;
 371         unsigned long amount;
 372         struct sk_buff *skb;
 373         int sum;
 374         unsigned long flags;
 375 
 376         if(sk && sk->debug)
 377                 printk("tcp_readable: %p - ",sk);
 378 
 379         save_flags(flags);
 380         cli();
 381         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 382         {
 383                 restore_flags(flags);
 384                 if(sk && sk->debug) 
 385                         printk("empty\n");
 386                 return(0);
 387         }
 388   
 389         counted = sk->copied_seq+1;     /* Where we are at the moment */
 390         amount = 0;
 391   
 392         /* Do until a push or until we are out of data. */
 393         do 
 394         {
 395                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 396                         break;
 397                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 398                 if (skb->h.th->syn)
 399                         sum++;
 400                 if (sum > 0) 
 401                 {                                       /* Add it up, move on */
 402                         amount += sum;
 403                         if (skb->h.th->syn) 
 404                                 amount--;
 405                         counted += sum;
 406                 }
 407                 if (amount && skb->h.th->psh) break;
 408                 skb = skb->next;
 409         }
 410         while(skb != (struct sk_buff *)&sk->receive_queue);
 411 
 412         if (amount && !sk->urginline && sk->urg_data &&
 413             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 414                 amount--;               /* don't count urg data */
 415         restore_flags(flags);
 416         if(sk->debug)
 417                 printk("got %lu bytes.\n",amount);
 418         return(amount);
 419 }
 420 
 421 
 422 /*
 423  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 424  *      listening socket has a receive queue of sockets to accept.
 425  */
 426 
 427 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 428 {
 429         sk->inuse = 1;
 430 
 431         switch(sel_type) 
 432         {
 433                 case SEL_IN:
 434                         if(sk->debug)
 435                                 printk("select in");
 436                         select_wait(sk->sleep, wait);
 437                         if(sk->debug)
 438                                 printk("-select out");
 439                         if (skb_peek(&sk->receive_queue) != NULL) 
 440                         {
 441                                 if ((sk->state == TCP_LISTEN && tcp_find_established(sk)) || tcp_readable(sk)) 
 442                                 {
 443                                         release_sock(sk);
 444                                         if(sk->debug)
 445                                                 printk("-select ok data\n");
 446                                         return(1);
 447                                 }
 448                         }
 449                         if (sk->err != 0)       /* Receiver error */
 450                         {
 451                                 release_sock(sk);
 452                                 if(sk->debug)
 453                                         printk("-select ok error");
 454                                 return(1);
 455                         }
 456                         if (sk->shutdown & RCV_SHUTDOWN) 
 457                         {
 458                                 release_sock(sk);
 459                                 if(sk->debug)
 460                                         printk("-select ok down\n");
 461                                 return(1);
 462                         } 
 463                         else 
 464                         {
 465                                 release_sock(sk);
 466                                 if(sk->debug)
 467                                         printk("-select fail\n");
 468                                 return(0);
 469                         }
 470                 case SEL_OUT:
 471                         select_wait(sk->sleep, wait);
 472                         if (sk->shutdown & SEND_SHUTDOWN) 
 473                         {
 474                                 /* FIXME: should this return an error? */
 475                                 release_sock(sk);
 476                                 return(0);
 477                         }
 478 
 479                         /*
 480                          * This is now right thanks to a small fix
 481                          * by Matt Dillon.
 482                          */
 483                         
 484                         if (sk->prot->wspace(sk) >= sk->mtu+128+sk->prot->max_header) 
 485                         {
 486                                 release_sock(sk);
 487                                 /* This should cause connect to work ok. */
 488                                 if (sk->state == TCP_SYN_RECV ||
 489                                     sk->state == TCP_SYN_SENT) return(0);
 490                                 return(1);
 491                         }
 492                         release_sock(sk);
 493                         return(0);
 494                 case SEL_EX:
 495                         select_wait(sk->sleep,wait);
 496                         if (sk->err || sk->urg_data) 
 497                         {
 498                                 release_sock(sk);
 499                                 return(1);
 500                         }
 501                         release_sock(sk);
 502                         return(0);
 503         }
 504 
 505         release_sock(sk);
 506         return(0);
 507 }
 508 
 509 
 510 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 511 {
 512         int err;
 513         switch(cmd) 
 514         {
 515 
 516                 case TIOCINQ:
 517 #ifdef FIXME    /* FIXME: */
 518                 case FIONREAD:
 519 #endif
 520                 {
 521                         unsigned long amount;
 522 
 523                         if (sk->state == TCP_LISTEN) 
 524                                 return(-EINVAL);
 525 
 526                         sk->inuse = 1;
 527                         amount = tcp_readable(sk);
 528                         release_sock(sk);
 529                         err=verify_area(VERIFY_WRITE,(void *)arg,
 530                                                    sizeof(unsigned long));
 531                         if(err)
 532                                 return err;
 533                         put_fs_long(amount,(unsigned long *)arg);
 534                         return(0);
 535                 }
 536                 case SIOCATMARK:
 537                 {
 538                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 539 
 540                         err = verify_area(VERIFY_WRITE,(void *) arg,
 541                                                   sizeof(unsigned long));
 542                         if (err)
 543                                 return err;
 544                         put_fs_long(answ,(int *) arg);
 545                         return(0);
 546                 }
 547                 case TIOCOUTQ:
 548                 {
 549                         unsigned long amount;
 550 
 551                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 552                         amount = sk->prot->wspace(sk);
 553                         err=verify_area(VERIFY_WRITE,(void *)arg,
 554                                                    sizeof(unsigned long));
 555                         if(err)
 556                                 return err;
 557                         put_fs_long(amount,(unsigned long *)arg);
 558                         return(0);
 559                 }
 560                 default:
 561                         return(-EINVAL);
 562         }
 563 }
 564 
 565 
 566 /*
 567  *      This routine computes a TCP checksum. 
 568  */
 569  
 570 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 571           unsigned long saddr, unsigned long daddr)
 572 {     
 573         unsigned long sum;
 574    
 575         if (saddr == 0) saddr = ip_my_addr();
 576 
 577 /*
 578  * stupid, gcc complains when I use just one __asm__ block,
 579  * something about too many reloads, but this is just two
 580  * instructions longer than what I want
 581  */
 582         __asm__("
 583             addl %%ecx, %%ebx
 584             adcl %%edx, %%ebx
 585             adcl $0, %%ebx
 586             "
 587         : "=b"(sum)
 588         : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 589         : "bx", "cx", "dx" );
 590         __asm__("
 591             movl %%ecx, %%edx
 592             cld
 593             cmpl $32, %%ecx
 594             jb 2f
 595             shrl $5, %%ecx
 596             clc
 597 1:          lodsl
 598             adcl %%eax, %%ebx
 599             lodsl
 600             adcl %%eax, %%ebx
 601             lodsl
 602             adcl %%eax, %%ebx
 603             lodsl
 604             adcl %%eax, %%ebx
 605             lodsl
 606             adcl %%eax, %%ebx
 607             lodsl
 608             adcl %%eax, %%ebx
 609             lodsl
 610             adcl %%eax, %%ebx
 611             lodsl
 612             adcl %%eax, %%ebx
 613             loop 1b
 614             adcl $0, %%ebx
 615             movl %%edx, %%ecx
 616 2:          andl $28, %%ecx
 617             je 4f
 618             shrl $2, %%ecx
 619             clc
 620 3:          lodsl
 621             adcl %%eax, %%ebx
 622             loop 3b
 623             adcl $0, %%ebx
 624 4:          movl $0, %%eax
 625             testw $2, %%dx
 626             je 5f
 627             lodsw
 628             addl %%eax, %%ebx
 629             adcl $0, %%ebx
 630             movw $0, %%ax
 631 5:          test $1, %%edx
 632             je 6f
 633             lodsb
 634             addl %%eax, %%ebx
 635             adcl $0, %%ebx
 636 6:          movl %%ebx, %%eax
 637             shrl $16, %%eax
 638             addw %%ax, %%bx
 639             adcw $0, %%bx
 640             "
 641         : "=b"(sum)
 642         : "0"(sum), "c"(len), "S"(th)
 643         : "ax", "bx", "cx", "dx", "si" );
 644 
 645         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 646   
 647         return((~sum) & 0xffff);
 648 }
 649 
 650 
 651 
 652 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 653                 unsigned long daddr, int len, struct sock *sk)
 654 {
 655         th->check = 0;
 656         th->check = tcp_check(th, len, saddr, daddr);
 657         return;
 658 }
 659 
 660 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 661 {
 662         int size;
 663         struct tcphdr * th = skb->h.th;
 664 
 665         /* length of packet (not counting length of pre-tcp headers) */
 666         size = skb->len - ((unsigned char *) th - skb->data);
 667 
 668         /* sanity check it.. */
 669         if (size < sizeof(struct tcphdr) || size > skb->len) 
 670         {
 671                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 672                         skb, skb->data, th, skb->len);
 673                 kfree_skb(skb, FREE_WRITE);
 674                 return;
 675         }
 676 
 677         /* If we have queued a header size packet.. */
 678         if (size == sizeof(struct tcphdr)) 
 679         {
 680                 /* If its got a syn or fin its notionally included in the size..*/
 681                 if(!th->syn && !th->fin) 
 682                 {
 683                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 684                         kfree_skb(skb,FREE_WRITE);
 685                         return;
 686                 }
 687         }
 688 
 689         tcp_statistics.TcpOutSegs++;  
 690 
 691         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 692         if (after(skb->h.seq, sk->window_seq) ||
 693             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 694              sk->packets_out >= sk->cong_window) 
 695         {
 696                 /* checksum will be supplied by tcp_write_xmit.  So
 697                  * we shouldn't need to set it at all.  I'm being paranoid */
 698                 th->check = 0;
 699                 if (skb->next != NULL) 
 700                 {
 701                         printk("tcp_send_partial: next != NULL\n");
 702                         skb_unlink(skb);
 703                 }
 704                 skb_queue_tail(&sk->write_queue, skb);
 705                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 706                     sk->send_head == NULL &&
 707                     sk->ack_backlog == 0)
 708                         reset_timer(sk, TIME_PROBE0, sk->rto);
 709         } 
 710         else 
 711         {
 712                 th->ack_seq = ntohl(sk->acked_seq);
 713                 th->window = ntohs(tcp_select_window(sk));
 714 
 715                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 716 
 717                 sk->sent_seq = sk->write_seq;
 718                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 719         }
 720 }
 721 
 722 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 723 {
 724         struct sk_buff * skb;
 725         unsigned long flags;
 726 
 727         save_flags(flags);
 728         cli();
 729         skb = sk->partial;
 730         if (skb) {
 731                 sk->partial = NULL;
 732                 del_timer(&sk->partial_timer);
 733         }
 734         restore_flags(flags);
 735         return skb;
 736 }
 737 
 738 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 739 {
 740         struct sk_buff *skb;
 741 
 742         if (sk == NULL)
 743                 return;
 744         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 745                 tcp_send_skb(sk, skb);
 746 }
 747 
 748 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 749 {
 750         struct sk_buff * tmp;
 751         unsigned long flags;
 752 
 753         save_flags(flags);
 754         cli();
 755         tmp = sk->partial;
 756         if (tmp)
 757                 del_timer(&sk->partial_timer);
 758         sk->partial = skb;
 759         init_timer(&sk->partial_timer);
 760         sk->partial_timer.expires = HZ;
 761         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 762         sk->partial_timer.data = (unsigned long) sk;
 763         add_timer(&sk->partial_timer);
 764         restore_flags(flags);
 765         if (tmp)
 766                 tcp_send_skb(sk, tmp);
 767 }
 768 
 769 
 770 /*
 771  *      This routine sends an ack and also updates the window. 
 772  */
 773  
 774 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 775              struct sock *sk,
 776              struct tcphdr *th, unsigned long daddr)
 777 {
 778         struct sk_buff *buff;
 779         struct tcphdr *t1;
 780         struct device *dev = NULL;
 781         int tmp;
 782 
 783         if(sk->zapped)
 784                 return;         /* We have been reset, we may not send again */
 785         /*
 786          * We need to grab some memory, and put together an ack,
 787          * and then put it into the queue to be sent.
 788          */
 789 
 790         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 791         if (buff == NULL) 
 792         {
 793                 /* Force it to send an ack. */
 794                 sk->ack_backlog++;
 795                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 796                 {
 797                         reset_timer(sk, TIME_WRITE, 10);
 798                 }
 799                 return;
 800         }
 801 
 802         buff->len = sizeof(struct tcphdr);
 803         buff->sk = sk;
 804         buff->localroute = sk->localroute;
 805         t1 =(struct tcphdr *) buff->data;
 806 
 807         /* Put in the IP header and routing stuff. */
 808         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 809                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 810         if (tmp < 0) 
 811         {
 812                 buff->free=1;
 813                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 814                 return;
 815         }
 816         buff->len += tmp;
 817         t1 =(struct tcphdr *)((char *)t1 +tmp);
 818 
 819         /* FIXME: */
 820         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 821 
 822         /*
 823          *      Swap the send and the receive. 
 824          */
 825          
 826         t1->dest = th->source;
 827         t1->source = th->dest;
 828         t1->seq = ntohl(sequence);
 829         t1->ack = 1;
 830         sk->window = tcp_select_window(sk);
 831         t1->window = ntohs(sk->window);
 832         t1->res1 = 0;
 833         t1->res2 = 0;
 834         t1->rst = 0;
 835         t1->urg = 0;
 836         t1->syn = 0;
 837         t1->psh = 0;
 838         t1->fin = 0;
 839         if (ack == sk->acked_seq) 
 840         {
 841                 sk->ack_backlog = 0;
 842                 sk->bytes_rcv = 0;
 843                 sk->ack_timed = 0;
 844                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 845                                   && sk->timeout == TIME_WRITE) 
 846                 {
 847                         if(sk->keepopen) {
 848                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 849                         } else {
 850                                 delete_timer(sk);
 851                         }
 852                 }
 853         }
 854         t1->ack_seq = ntohl(ack);
 855         t1->doff = sizeof(*t1)/4;
 856         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 857         if (sk->debug)
 858                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 859         tcp_statistics.TcpOutSegs++;
 860         sk->prot->queue_xmit(sk, dev, buff, 1);
 861 }
 862 
 863 
 864 /* 
 865  *      This routine builds a generic TCP header. 
 866  */
 867  
 868 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 869 {
 870 
 871         /* FIXME: want to get rid of this. */
 872         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 873         th->seq = htonl(sk->write_seq);
 874         th->psh =(push == 0) ? 1 : 0;
 875         th->doff = sizeof(*th)/4;
 876         th->ack = 1;
 877         th->fin = 0;
 878         sk->ack_backlog = 0;
 879         sk->bytes_rcv = 0;
 880         sk->ack_timed = 0;
 881         th->ack_seq = htonl(sk->acked_seq);
 882         sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 883         th->window = htons(sk->window);
 884 
 885         return(sizeof(*th));
 886 }
 887 
 888 /*
 889  *      This routine copies from a user buffer into a socket,
 890  *      and starts the transmit system.
 891  */
 892 
 893 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 894           int len, int nonblock, unsigned flags)
 895 {
 896         int copied = 0;
 897         int copy;
 898         int tmp;
 899         struct sk_buff *skb;
 900         struct sk_buff *send_tmp;
 901         unsigned char *buff;
 902         struct proto *prot;
 903         struct device *dev = NULL;
 904 
 905         sk->inuse=1;
 906         prot = sk->prot;
 907         while(len > 0) 
 908         {
 909                 if (sk->err) 
 910                 {                       /* Stop on an error */
 911                         release_sock(sk);
 912                         if (copied) 
 913                                 return(copied);
 914                         tmp = -sk->err;
 915                         sk->err = 0;
 916                         return(tmp);
 917                 }
 918 
 919         /*
 920          *      First thing we do is make sure that we are established. 
 921          */
 922         
 923                 if (sk->shutdown & SEND_SHUTDOWN) 
 924                 {
 925                         release_sock(sk);
 926                         sk->err = EPIPE;
 927                         if (copied) 
 928                                 return(copied);
 929                         sk->err = 0;
 930                         return(-EPIPE);
 931                 }
 932 
 933 
 934         /* 
 935          *      Wait for a connection to finish.
 936          */
 937         
 938                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 939                 {
 940                         if (sk->err) 
 941                         {
 942                                 release_sock(sk);
 943                                 if (copied) 
 944                                         return(copied);
 945                                 tmp = -sk->err;
 946                                 sk->err = 0;
 947                                 return(tmp);
 948                         }
 949 
 950                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 951                         {
 952                                 release_sock(sk);
 953                                 if (copied) 
 954                                         return(copied);
 955 
 956                                 if (sk->err) 
 957                                 {
 958                                         tmp = -sk->err;
 959                                         sk->err = 0;
 960                                         return(tmp);
 961                                 }
 962 
 963                                 if (sk->keepopen) 
 964                                 {
 965                                         send_sig(SIGPIPE, current, 0);
 966                                 }
 967                                 return(-EPIPE);
 968                         }
 969 
 970                         if (nonblock || copied) 
 971                         {
 972                                 release_sock(sk);
 973                                 if (copied) 
 974                                         return(copied);
 975                                 return(-EAGAIN);
 976                         }
 977 
 978                         release_sock(sk);
 979                         cli();
 980                 
 981                         if (sk->state != TCP_ESTABLISHED &&
 982                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 983                         {
 984                                 interruptible_sleep_on(sk->sleep);
 985                                 if (current->signal & ~current->blocked) 
 986                                 {
 987                                         sti();
 988                                         if (copied) 
 989                                                 return(copied);
 990                                         return(-ERESTARTSYS);
 991                                 }
 992                         }
 993                         sk->inuse = 1;
 994                         sti();
 995                 }
 996 
 997         /*
 998          * The following code can result in copy <= if sk->mss is ever
 999          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
1000          * sk->mtu is constant once SYN processing is finished.  I.e. we
1001          * had better not get here until we've seen his SYN and at least one
1002          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
1003          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
1004          * non-decreasing.  Note that any ioctl to set user_mss must be done
1005          * before the exchange of SYN's.  If the initial ack from the other
1006          * end has a window of 0, max_window and thus mss will both be 0.
1007          */
1008 
1009         /* 
1010          *      Now we need to check if we have a half built packet. 
1011          */
1012 
1013                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
1014                 {
1015                         int hdrlen;
1016 
1017                          /* IP header + TCP header */
1018                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
1019                                  + sizeof(struct tcphdr);
1020         
1021                         /* Add more stuff to the end of skb->len */
1022                         if (!(flags & MSG_OOB)) 
1023                         {
1024                                 copy = min(sk->mss - (skb->len - hdrlen), len);
1025                                 /* FIXME: this is really a bug. */
1026                                 if (copy <= 0) 
1027                                 {
1028                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
1029                                         copy = 0;
1030                                 }
1031           
1032                                 memcpy_fromfs(skb->data + skb->len, from, copy);
1033                                 skb->len += copy;
1034                                 from += copy;
1035                                 copied += copy;
1036                                 len -= copy;
1037                                 sk->write_seq += copy;
1038                         }
1039                         if ((skb->len - hdrlen) >= sk->mss ||
1040                                 (flags & MSG_OOB) || !sk->packets_out)
1041                                 tcp_send_skb(sk, skb);
1042                         else
1043                                 tcp_enqueue_partial(skb, sk);
1044                         continue;
1045                 }
1046 
1047         /*
1048          * We also need to worry about the window.
1049          * If window < 1/2 the maximum window we've seen from this
1050          *   host, don't use it.  This is sender side
1051          *   silly window prevention, as specified in RFC1122.
1052          *   (Note that this is different than earlier versions of
1053          *   SWS prevention, e.g. RFC813.).  What we actually do is 
1054          *   use the whole MSS.  Since the results in the right
1055          *   edge of the packet being outside the window, it will
1056          *   be queued for later rather than sent.
1057          */
1058 
1059                 copy = sk->window_seq - sk->write_seq;
1060                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
1061                         copy = sk->mss;
1062                 if (copy > len)
1063                         copy = len;
1064 
1065         /*
1066          *      We should really check the window here also. 
1067          */
1068          
1069                 send_tmp = NULL;
1070                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1071                 {
1072                         /*
1073                          *      We will release the socket incase we sleep here. 
1074                          */
1075                         release_sock(sk);
1076                         /*
1077                          *      NB: following must be mtu, because mss can be increased.
1078                          *      mss is always <= mtu 
1079                          */
1080                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1081                         sk->inuse = 1;
1082                         send_tmp = skb;
1083                 } 
1084                 else 
1085                 {
1086                         /*
1087                          *      We will release the socket incase we sleep here. 
1088                          */
1089                         release_sock(sk);
1090                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1091                         sk->inuse = 1;
1092                 }
1093 
1094                 /*
1095                  *      If we didn't get any memory, we need to sleep. 
1096                  */
1097 
1098                 if (skb == NULL) 
1099                 {
1100                         if (nonblock /* || copied */) 
1101                         {
1102                                 release_sock(sk);
1103                                 if (copied) 
1104                                         return(copied);
1105                                 return(-EAGAIN);
1106                         }
1107 
1108                         /*
1109                          *      FIXME: here is another race condition. 
1110                          */
1111 
1112                         tmp = sk->wmem_alloc;
1113                         release_sock(sk);
1114                         cli();
1115                         /*
1116                          *      Again we will try to avoid it. 
1117                          */
1118                         if (tmp <= sk->wmem_alloc &&
1119                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1120                                 && sk->err == 0) 
1121                         {
1122                                 interruptible_sleep_on(sk->sleep);
1123                                 if (current->signal & ~current->blocked) 
1124                                 {
1125                                         sti();
1126                                         if (copied) 
1127                                                 return(copied);
1128                                         return(-ERESTARTSYS);
1129                                 }
1130                         }
1131                         sk->inuse = 1;
1132                         sti();
1133                         continue;
1134                 }
1135 
1136                 skb->len = 0;
1137                 skb->sk = sk;
1138                 skb->free = 0;
1139                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1140         
1141                 buff = skb->data;
1142         
1143                 /*
1144                  * FIXME: we need to optimize this.
1145                  * Perhaps some hints here would be good.
1146                  */
1147                 
1148                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1149                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1150                 if (tmp < 0 ) 
1151                 {
1152                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1153                         release_sock(sk);
1154                         if (copied) 
1155                                 return(copied);
1156                         return(tmp);
1157                 }
1158                 skb->len += tmp;
1159                 skb->dev = dev;
1160                 buff += tmp;
1161                 skb->h.th =(struct tcphdr *) buff;
1162                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1163                 if (tmp < 0) 
1164                 {
1165                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1166                         release_sock(sk);
1167                         if (copied) 
1168                                 return(copied);
1169                         return(tmp);
1170                 }
1171 
1172                 if (flags & MSG_OOB) 
1173                 {
1174                         ((struct tcphdr *)buff)->urg = 1;
1175                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1176                 }
1177                 skb->len += tmp;
1178                 memcpy_fromfs(buff+tmp, from, copy);
1179 
1180                 from += copy;
1181                 copied += copy;
1182                 len -= copy;
1183                 skb->len += copy;
1184                 skb->free = 0;
1185                 sk->write_seq += copy;
1186         
1187                 if (send_tmp != NULL && sk->packets_out) 
1188                 {
1189                         tcp_enqueue_partial(send_tmp, sk);
1190                         continue;
1191                 }
1192                 tcp_send_skb(sk, skb);
1193         }
1194         sk->err = 0;
1195 
1196 /*
1197  *      Nagle's rule. Turn Nagle off with TCP_NODELAY for highly
1198  *      interactive fast network servers. It's meant to be on and
1199  *      it really improves the throughput though not the echo time
1200  *      on my slow slip link - Alan
1201  */
1202 
1203 /*
1204  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1205  */
1206  
1207         if(sk->partial && ((!sk->packets_out) 
1208      /* If not nagling we can send on the before case too.. */
1209               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1210         ))
1211                 tcp_send_partial(sk);
1212 
1213         release_sock(sk);
1214         return(copied);
1215 }
1216 
1217 
1218 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1219            int len, int nonblock, unsigned flags,
1220            struct sockaddr_in *addr, int addr_len)
1221 {
1222         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1223                 return -EINVAL;
1224         if (addr_len < sizeof(*addr)) 
1225                 return(-EINVAL);
1226         if (addr->sin_family && addr->sin_family != AF_INET) 
1227                 return(-EINVAL);
1228         if (addr->sin_port != sk->dummy_th.dest) 
1229                 return(-EISCONN);
1230         if (addr->sin_addr.s_addr != sk->daddr) 
1231                 return(-EISCONN);
1232         return(tcp_write(sk, from, len, nonblock, flags));
1233 }
1234 
1235 
1236 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1237 {
1238         int tmp;
1239         struct device *dev = NULL;
1240         struct tcphdr *t1;
1241         struct sk_buff *buff;
1242 
1243         if (!sk->ack_backlog) 
1244                 return;
1245 
1246         /*
1247          * FIXME: we need to put code here to prevent this routine from
1248          * being called.  Being called once in a while is ok, so only check
1249          * if this is the second time in a row.
1250          */
1251 
1252         /*
1253          * We need to grab some memory, and put together an ack,
1254          * and then put it into the queue to be sent.
1255          */
1256 
1257         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1258         if (buff == NULL) 
1259         {
1260                 /* Try again real soon. */
1261                 reset_timer(sk, TIME_WRITE, 10);
1262                 return;
1263         }
1264 
1265         buff->len = sizeof(struct tcphdr);
1266         buff->sk = sk;
1267         buff->localroute = sk->localroute;
1268         
1269         /*
1270          *      Put in the IP header and routing stuff. 
1271          */
1272 
1273         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1274                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1275         if (tmp < 0) 
1276         {
1277                 buff->free=1;
1278                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1279                 return;
1280         }
1281 
1282         buff->len += tmp;
1283         t1 =(struct tcphdr *)(buff->data +tmp);
1284 
1285         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1286         t1->seq = htonl(sk->sent_seq);
1287         t1->ack = 1;
1288         t1->res1 = 0;
1289         t1->res2 = 0;
1290         t1->rst = 0;
1291         t1->urg = 0;
1292         t1->syn = 0;
1293         t1->psh = 0;
1294         sk->ack_backlog = 0;
1295         sk->bytes_rcv = 0;
1296         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1297         t1->window = ntohs(sk->window);
1298         t1->ack_seq = ntohl(sk->acked_seq);
1299         t1->doff = sizeof(*t1)/4;
1300         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1301         sk->prot->queue_xmit(sk, dev, buff, 1);
1302         tcp_statistics.TcpOutSegs++;
1303 }
1304 
1305 
1306 /*
1307  *      FIXME:
1308  *      This routine frees used buffers.
1309  *      It should consider sending an ACK to let the
1310  *      other end know we now have a bigger window.
1311  */
1312 
1313 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1314 {
1315         unsigned long flags;
1316         unsigned long left;
1317         struct sk_buff *skb;
1318         unsigned long rspace;
1319 
1320         if(sk->debug)
1321                 printk("cleaning rbuf for sk=%p\n", sk);
1322   
1323         save_flags(flags);
1324         cli();
1325   
1326         left = sk->prot->rspace(sk);
1327  
1328         /*
1329          * We have to loop through all the buffer headers,
1330          * and try to free up all the space we can.
1331          */
1332 
1333         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1334         {
1335                 if (!skb->used) 
1336                         break;
1337                 skb_unlink(skb);
1338                 skb->sk = sk;
1339                 kfree_skb(skb, FREE_READ);
1340         }
1341 
1342         restore_flags(flags);
1343 
1344         /*
1345          * FIXME:
1346          * At this point we should send an ack if the difference
1347          * in the window, and the amount of space is bigger than
1348          * TCP_WINDOW_DIFF.
1349          */
1350 
1351         if(sk->debug)
1352                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1353                                             left);
1354         if ((rspace=sk->prot->rspace(sk)) != left) 
1355         {
1356                 /*
1357                  * This area has caused the most trouble.  The current strategy
1358                  * is to simply do nothing if the other end has room to send at
1359                  * least 3 full packets, because the ack from those will auto-
1360                  * matically update the window.  If the other end doesn't think
1361                  * we have much space left, but we have room for at least 1 more
1362                  * complete packet than it thinks we do, we will send an ack
1363                  * immediately.  Otherwise we will wait up to .5 seconds in case
1364                  * the user reads some more.
1365                  */
1366                 sk->ack_backlog++;
1367         /*
1368          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1369          * if the other end is offering a window smaller than the agreed on MSS
1370          * (called sk->mtu here).  In theory there's no connection between send
1371          * and receive, and so no reason to think that they're going to send
1372          * small packets.  For the moment I'm using the hack of reducing the mss
1373          * only on the send side, so I'm putting mtu here.
1374          */
1375 
1376                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1377                 {
1378                         /* Send an ack right now. */
1379                         tcp_read_wakeup(sk);
1380                 } 
1381                 else 
1382                 {
1383                         /* Force it to send an ack soon. */
1384                         int was_active = del_timer(&sk->timer);
1385                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1386                         {
1387                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1388                         } 
1389                         else
1390                                 add_timer(&sk->timer);
1391                 }
1392         }
1393 } 
1394 
1395 
1396 /*
1397  *      Handle reading urgent data. 
1398  */
1399  
1400 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1401              unsigned char *to, int len, unsigned flags)
1402 {
1403         struct wait_queue wait = { current, NULL };
1404 
1405         while (len > 0) 
1406         {
1407                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1408                         return -EINVAL;
1409                 if (sk->urg_data & URG_VALID) 
1410                 {
1411                         char c = sk->urg_data;
1412                         if (!(flags & MSG_PEEK))
1413                                 sk->urg_data = URG_READ;
1414                         put_fs_byte(c, to);
1415                         return 1;
1416                 }
1417 
1418                 if (sk->err) 
1419                 {
1420                         int tmp = -sk->err;
1421                         sk->err = 0;
1422                         return tmp;
1423                 }
1424 
1425                 if (sk->state == TCP_CLOSE || sk->done) 
1426                 {
1427                         if (!sk->done) {
1428                                 sk->done = 1;
1429                                 return 0;
1430                         }
1431                         return -ENOTCONN;
1432                 }
1433 
1434                 if (sk->shutdown & RCV_SHUTDOWN) 
1435                 {
1436                         sk->done = 1;
1437                         return 0;
1438                 }
1439 
1440                 if (nonblock)
1441                         return -EAGAIN;
1442 
1443                 if (current->signal & ~current->blocked)
1444                         return -ERESTARTSYS;
1445 
1446                 current->state = TASK_INTERRUPTIBLE;
1447                 add_wait_queue(sk->sleep, &wait);
1448                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1449                     !(sk->shutdown & RCV_SHUTDOWN))
1450                         schedule();
1451                 remove_wait_queue(sk->sleep, &wait);
1452                 current->state = TASK_RUNNING;
1453         }
1454         return 0;
1455 }
1456 
1457 
1458 /*
1459  *      This routine copies from a sock struct into the user buffer. 
1460  */
1461  
1462 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1463         int len, int nonblock, unsigned flags)
1464 {
1465         struct wait_queue wait = { current, NULL };
1466         int copied = 0;
1467         unsigned long peek_seq;
1468         unsigned long *seq;
1469         unsigned long used;
1470 
1471         /* This error should be checked. */
1472         if (sk->state == TCP_LISTEN)
1473                 return -ENOTCONN;
1474 
1475         /* Urgent data needs to be handled specially. */
1476         if (flags & MSG_OOB)
1477                 return tcp_read_urg(sk, nonblock, to, len, flags);
1478 
1479         peek_seq = sk->copied_seq;
1480         seq = &sk->copied_seq;
1481         if (flags & MSG_PEEK)
1482                 seq = &peek_seq;
1483 
1484         add_wait_queue(sk->sleep, &wait);
1485         sk->inuse = 1;
1486         while (len > 0) 
1487         {
1488                 struct sk_buff * skb;
1489                 unsigned long offset;
1490         
1491                 /*
1492                  * are we at urgent data? Stop if we have read anything.
1493                  */
1494                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1495                         break;
1496 
1497                 current->state = TASK_INTERRUPTIBLE;
1498 
1499                 skb = skb_peek(&sk->receive_queue);
1500                 do 
1501                 {
1502                         if (!skb)
1503                                 break;
1504                         if (before(1+*seq, skb->h.th->seq))
1505                                 break;
1506                         offset = 1 + *seq - skb->h.th->seq;
1507                         if (skb->h.th->syn)
1508                                 offset--;
1509                         if (offset < skb->len)
1510                                 goto found_ok_skb;
1511                         if (!(flags & MSG_PEEK))
1512                                 skb->used = 1;
1513                         skb = skb->next;
1514                 }
1515                 while (skb != (struct sk_buff *)&sk->receive_queue);
1516 
1517                 if (copied)
1518                         break;
1519 
1520                 if (sk->err) 
1521                 {
1522                         copied = -sk->err;
1523                         sk->err = 0;
1524                         break;
1525                 }
1526 
1527                 if (sk->state == TCP_CLOSE) 
1528                 {
1529                         if (!sk->done) 
1530                         {
1531                                 sk->done = 1;
1532                                 break;
1533                         }
1534                         copied = -ENOTCONN;
1535                         break;
1536                 }
1537 
1538                 if (sk->shutdown & RCV_SHUTDOWN) 
1539                 {
1540                         sk->done = 1;
1541                         break;
1542                 }
1543                         
1544                 if (nonblock) 
1545                 {
1546                         copied = -EAGAIN;
1547                         break;
1548                 }
1549 
1550                 cleanup_rbuf(sk);
1551                 release_sock(sk);
1552                 schedule();
1553                 sk->inuse = 1;
1554 
1555                 if (current->signal & ~current->blocked) 
1556                 {
1557                         copied = -ERESTARTSYS;
1558                         break;
1559                 }
1560                 continue;
1561 
1562         found_ok_skb:
1563                 /* Ok so how much can we use ? */
1564                 used = skb->len - offset;
1565                 if (len < used)
1566                         used = len;
1567                 /* do we have urgent data here? */
1568                 if (sk->urg_data) 
1569                 {
1570                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1571                         if (urg_offset < used) 
1572                         {
1573                                 if (!urg_offset) 
1574                                 {
1575                                         if (!sk->urginline) 
1576                                         {
1577                                                 ++*seq;
1578                                                 offset++;
1579                                                 used--;
1580                                         }
1581                                 }
1582                                 else
1583                                         used = urg_offset;
1584                         }
1585                 }
1586                 /* Copy it */
1587                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1588                         skb->h.th->doff*4 + offset, used);
1589                 copied += used;
1590                 len -= used;
1591                 to += used;
1592                 *seq += used;
1593                 if (after(sk->copied_seq+1,sk->urg_seq))
1594                         sk->urg_data = 0;
1595                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1596                         skb->used = 1;
1597         }
1598         remove_wait_queue(sk->sleep, &wait);
1599         current->state = TASK_RUNNING;
1600 
1601         /* Clean up data we have read: This will do ACK frames */
1602         cleanup_rbuf(sk);
1603         release_sock(sk);
1604         return copied;
1605 }
1606 
1607  
1608 /*
1609  *      Shutdown the sending side of a connection.
1610  */
1611 
1612 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1613 {
1614         struct sk_buff *buff;
1615         struct tcphdr *t1, *th;
1616         struct proto *prot;
1617         int tmp;
1618         struct device *dev = NULL;
1619 
1620         /*
1621          * We need to grab some memory, and put together a FIN,
1622          * and then put it into the queue to be sent.
1623          * FIXME:
1624          *
1625          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1626          *      Most of this is guesswork, so maybe it will work...
1627          */
1628 
1629         if (!(how & SEND_SHUTDOWN)) 
1630                 return;
1631          
1632         /*
1633          *      If we've already sent a FIN, return. 
1634          */
1635          
1636         if (sk->state == TCP_FIN_WAIT1 ||
1637             sk->state == TCP_FIN_WAIT2 ||
1638             sk->state == TCP_CLOSING ||
1639             sk->state == TCP_LAST_ACK ||
1640             sk->state == TCP_TIME_WAIT
1641         ) 
1642         {
1643                 return;
1644         }
1645         sk->inuse = 1;
1646 
1647         /*
1648          * flag that the sender has shutdown
1649          */
1650 
1651         sk->shutdown |= SEND_SHUTDOWN;
1652 
1653         /*
1654          *  Clear out any half completed packets. 
1655          */
1656 
1657         if (sk->partial)
1658                 tcp_send_partial(sk);
1659 
1660         prot =(struct proto *)sk->prot;
1661         th =(struct tcphdr *)&sk->dummy_th;
1662         release_sock(sk); /* incase the malloc sleeps. */
1663         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1664         if (buff == NULL)
1665                 return;
1666         sk->inuse = 1;
1667 
1668         buff->sk = sk;
1669         buff->len = sizeof(*t1);
1670         buff->localroute = sk->localroute;
1671         t1 =(struct tcphdr *) buff->data;
1672 
1673         /*
1674          *      Put in the IP header and routing stuff. 
1675          */
1676 
1677         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1678                            IPPROTO_TCP, sk->opt,
1679                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1680         if (tmp < 0) 
1681         {
1682                 /*
1683                  *      Finish anyway, treat this as a send that got lost. 
1684                  *
1685                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1686                  *      written data to be completely acknowledged along
1687                  *      with an acknowledge to our FIN.
1688                  *
1689                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1690                  *      connection established.
1691                  */
1692                 buff->free=1;
1693                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1694 
1695                 if (sk->state == TCP_ESTABLISHED)
1696                         tcp_set_state(sk,TCP_FIN_WAIT1);
1697                 else if(sk->state == TCP_CLOSE_WAIT)
1698                         tcp_set_state(sk,TCP_LAST_ACK);
1699                 else
1700                         tcp_set_state(sk,TCP_FIN_WAIT2);
1701 
1702                 release_sock(sk);
1703                 return;
1704         }
1705 
1706         t1 =(struct tcphdr *)((char *)t1 +tmp);
1707         buff->len += tmp;
1708         buff->dev = dev;
1709         memcpy(t1, th, sizeof(*t1));
1710         t1->seq = ntohl(sk->write_seq);
1711         sk->write_seq++;
1712         buff->h.seq = sk->write_seq;
1713         t1->ack = 1;
1714         t1->ack_seq = ntohl(sk->acked_seq);
1715         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1716         t1->fin = 1;
1717         t1->rst = 0;
1718         t1->doff = sizeof(*t1)/4;
1719         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1720 
1721         /*
1722          * If there is data in the write queue, the fin must be appended to
1723          * the write queue.
1724          */
1725         
1726         if (skb_peek(&sk->write_queue) != NULL) 
1727         {
1728                 buff->free=0;
1729                 if (buff->next != NULL) 
1730                 {
1731                         printk("tcp_shutdown: next != NULL\n");
1732                         skb_unlink(buff);
1733                 }
1734                 skb_queue_tail(&sk->write_queue, buff);
1735         } 
1736         else 
1737         {
1738                 sk->sent_seq = sk->write_seq;
1739                 sk->prot->queue_xmit(sk, dev, buff, 0);
1740         }
1741 
1742         if (sk->state == TCP_ESTABLISHED) 
1743                 tcp_set_state(sk,TCP_FIN_WAIT1);
1744         else if (sk->state == TCP_CLOSE_WAIT)
1745                 tcp_set_state(sk,TCP_LAST_ACK);
1746         else
1747                 tcp_set_state(sk,TCP_FIN_WAIT2);
1748 
1749         release_sock(sk);
1750 }
1751 
1752 
1753 static int
1754 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1755              int to_len, int nonblock, unsigned flags,
1756              struct sockaddr_in *addr, int *addr_len)
1757 {
1758         int result;
1759   
1760         /* 
1761          *      Have to check these first unlike the old code. If 
1762          *      we check them after we lose data on an error
1763          *      which is wrong 
1764          */
1765 
1766         if(addr_len)
1767                 *addr_len = sizeof(*addr);
1768         result=tcp_read(sk, to, to_len, nonblock, flags);
1769 
1770         if (result < 0) 
1771                 return(result);
1772   
1773         if(addr)
1774         {
1775                 addr->sin_family = AF_INET;
1776                 addr->sin_port = sk->dummy_th.dest;
1777                 addr->sin_addr.s_addr = sk->daddr;
1778         }
1779         return(result);
1780 }
1781 
1782 
1783 /*
1784  *      This routine will send an RST to the other tcp. 
1785  */
1786  
1787 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1788           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1789 {
1790         struct sk_buff *buff;
1791         struct tcphdr *t1;
1792         int tmp;
1793         struct device *ndev=NULL;
1794   
1795 /*
1796  * We need to grab some memory, and put together an RST,
1797  * and then put it into the queue to be sent.
1798  */
1799 
1800         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1801         if (buff == NULL) 
1802                 return;
1803 
1804         buff->len = sizeof(*t1);
1805         buff->sk = NULL;
1806         buff->dev = dev;
1807         buff->localroute = 0;
1808 
1809         t1 =(struct tcphdr *) buff->data;
1810 
1811         /*
1812          *      Put in the IP header and routing stuff. 
1813          */
1814 
1815         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1816                            sizeof(struct tcphdr),tos,ttl);
1817         if (tmp < 0) 
1818         {
1819                 buff->free = 1;
1820                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1821                 return;
1822         }
1823 
1824         t1 =(struct tcphdr *)((char *)t1 +tmp);
1825         buff->len += tmp;
1826         memcpy(t1, th, sizeof(*t1));
1827 
1828         /*
1829          *      Swap the send and the receive. 
1830          */
1831 
1832         t1->dest = th->source;
1833         t1->source = th->dest;
1834         t1->rst = 1;  
1835         t1->window = 0;
1836   
1837         if(th->ack)
1838         {
1839                 t1->ack = 0;
1840                 t1->seq = th->ack_seq;
1841                 t1->ack_seq = 0;
1842         }
1843         else
1844         {
1845                 t1->ack = 1;
1846                 if(!th->syn)
1847                         t1->ack_seq=htonl(th->seq);
1848                 else
1849                         t1->ack_seq=htonl(th->seq+1);
1850                 t1->seq=0;
1851         }
1852 
1853         t1->syn = 0;
1854         t1->urg = 0;
1855         t1->fin = 0;
1856         t1->psh = 0;
1857         t1->doff = sizeof(*t1)/4;
1858         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1859         prot->queue_xmit(NULL, dev, buff, 1);
1860         tcp_statistics.TcpOutSegs++;
1861 }
1862 
1863 
1864 /*
1865  *      Look for tcp options. Parses everything but only knows about MSS.
1866  *      This routine is always called with the packet containing the SYN.
1867  *      However it may also be called with the ack to the SYN.  So you
1868  *      can't assume this is always the SYN.  It's always called after
1869  *      we have set up sk->mtu to our own MTU.
1870  */
1871  
1872 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1873 {
1874         unsigned char *ptr;
1875         int length=(th->doff*4)-sizeof(struct tcphdr);
1876         int mss_seen = 0;
1877     
1878         ptr = (unsigned char *)(th + 1);
1879   
1880         while(length>0)
1881         {
1882                 int opcode=*ptr++;
1883                 int opsize=*ptr++;
1884                 switch(opcode)
1885                 {
1886                         case TCPOPT_EOL:
1887                                 return;
1888                         case TCPOPT_NOP:
1889                                 length-=2;
1890                                 continue;
1891                         
1892                         default:
1893                                 if(opsize<=2)   /* Avoid silly options looping forever */
1894                                         return;
1895                                 switch(opcode)
1896                                 {
1897                                         case TCPOPT_MSS:
1898                                                 if(opsize==4 && th->syn)
1899                                                 {
1900                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1901                                                         mss_seen = 1;
1902                                                 }
1903                                                 break;
1904                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1905                                 }
1906                                 ptr+=opsize-2;
1907                                 length-=opsize;
1908                 }
1909         }
1910         if (th->syn) 
1911         {
1912                 if (! mss_seen)
1913                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1914         }
1915 #ifdef CONFIG_INET_PCTCP
1916         sk->mss = min(sk->max_window >> 1, sk->mtu);
1917 #else    
1918         sk->mss = min(sk->max_window, sk->mtu);
1919 #endif  
1920 }
1921 
1922 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1923 {
1924         dst = ntohl(dst);
1925         if (IN_CLASSA(dst))
1926                 return htonl(IN_CLASSA_NET);
1927         if (IN_CLASSB(dst))
1928                 return htonl(IN_CLASSB_NET);
1929         return htonl(IN_CLASSC_NET);
1930 }
1931 
1932 /*
1933  *      This routine handles a connection request.
1934  *      It should make sure we haven't already responded.
1935  *      Because of the way BSD works, we have to send a syn/ack now.
1936  *      This also means it will be harder to close a socket which is
1937  *      listening.
1938  */
1939  
1940 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1941                  unsigned long daddr, unsigned long saddr,
1942                  struct options *opt, struct device *dev)
1943 {
1944         struct sk_buff *buff;
1945         struct tcphdr *t1;
1946         unsigned char *ptr;
1947         struct sock *newsk;
1948         struct tcphdr *th;
1949         struct device *ndev=NULL;
1950         int tmp;
1951         struct rtable *rt;
1952   
1953         th = skb->h.th;
1954 
1955         /* If the socket is dead, don't accept the connection. */
1956         if (!sk->dead) 
1957         {
1958                 sk->data_ready(sk,0);
1959         }
1960         else 
1961         {
1962                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1963                 tcp_statistics.TcpAttemptFails++;
1964                 kfree_skb(skb, FREE_READ);
1965                 return;
1966         }
1967 
1968         /*
1969          * Make sure we can accept more.  This will prevent a
1970          * flurry of syns from eating up all our memory.
1971          */
1972 
1973         if (sk->ack_backlog >= sk->max_ack_backlog) 
1974         {
1975                 tcp_statistics.TcpAttemptFails++;
1976                 kfree_skb(skb, FREE_READ);
1977                 return;
1978         }
1979 
1980         /*
1981          * We need to build a new sock struct.
1982          * It is sort of bad to have a socket without an inode attached
1983          * to it, but the wake_up's will just wake up the listening socket,
1984          * and if the listening socket is destroyed before this is taken
1985          * off of the queue, this will take care of it.
1986          */
1987 
1988         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1989         if (newsk == NULL) 
1990         {
1991                 /* just ignore the syn.  It will get retransmitted. */
1992                 tcp_statistics.TcpAttemptFails++;
1993                 kfree_skb(skb, FREE_READ);
1994                 return;
1995         }
1996 
1997         memcpy(newsk, sk, sizeof(*newsk));
1998         skb_queue_head_init(&newsk->write_queue);
1999         skb_queue_head_init(&newsk->receive_queue);
2000         newsk->send_head = NULL;
2001         newsk->send_tail = NULL;
2002         skb_queue_head_init(&newsk->back_log);
2003         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
2004         newsk->rto = TCP_TIMEOUT_INIT;
2005         newsk->mdev = 0;
2006         newsk->max_window = 0;
2007         newsk->cong_window = 1;
2008         newsk->cong_count = 0;
2009         newsk->ssthresh = 0;
2010         newsk->backoff = 0;
2011         newsk->blog = 0;
2012         newsk->intr = 0;
2013         newsk->proc = 0;
2014         newsk->done = 0;
2015         newsk->partial = NULL;
2016         newsk->pair = NULL;
2017         newsk->wmem_alloc = 0;
2018         newsk->rmem_alloc = 0;
2019         newsk->localroute = sk->localroute;
2020 
2021         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
2022 
2023         newsk->err = 0;
2024         newsk->shutdown = 0;
2025         newsk->ack_backlog = 0;
2026         newsk->acked_seq = skb->h.th->seq+1;
2027         newsk->fin_seq = skb->h.th->seq;
2028         newsk->copied_seq = skb->h.th->seq;
2029         newsk->state = TCP_SYN_RECV;
2030         newsk->timeout = 0;
2031         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
2032         newsk->window_seq = newsk->write_seq;
2033         newsk->rcv_ack_seq = newsk->write_seq;
2034         newsk->urg_data = 0;
2035         newsk->retransmits = 0;
2036         newsk->destroy = 0;
2037         init_timer(&newsk->timer);
2038         newsk->timer.data = (unsigned long)newsk;
2039         newsk->timer.function = &net_timer;
2040         newsk->dummy_th.source = skb->h.th->dest;
2041         newsk->dummy_th.dest = skb->h.th->source;
2042         
2043         /*
2044          *      Swap these two, they are from our point of view. 
2045          */
2046          
2047         newsk->daddr = saddr;
2048         newsk->saddr = daddr;
2049 
2050         put_sock(newsk->num,newsk);
2051         newsk->dummy_th.res1 = 0;
2052         newsk->dummy_th.doff = 6;
2053         newsk->dummy_th.fin = 0;
2054         newsk->dummy_th.syn = 0;
2055         newsk->dummy_th.rst = 0;        
2056         newsk->dummy_th.psh = 0;
2057         newsk->dummy_th.ack = 0;
2058         newsk->dummy_th.urg = 0;
2059         newsk->dummy_th.res2 = 0;
2060         newsk->acked_seq = skb->h.th->seq + 1;
2061         newsk->copied_seq = skb->h.th->seq;
2062 
2063         /*
2064          *      Grab the ttl and tos values and use them 
2065          */
2066 
2067         newsk->ip_ttl=sk->ip_ttl;
2068         newsk->ip_tos=skb->ip_hdr->tos;
2069 
2070         /*
2071          *      Use 512 or whatever user asked for 
2072          */
2073 
2074         /*
2075          *      Note use of sk->user_mss, since user has no direct access to newsk 
2076          */
2077 
2078         rt=ip_rt_route(saddr, NULL,NULL);
2079         
2080         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
2081                 newsk->window_clamp = rt->rt_window;
2082         else
2083                 newsk->window_clamp = 0;
2084                 
2085         if (sk->user_mss)
2086                 newsk->mtu = sk->user_mss;
2087         else if(rt!=NULL && (rt->rt_flags&RTF_MSS))
2088                 newsk->mtu = rt->rt_mss - HEADER_SIZE;
2089         else 
2090         {
2091 #ifdef CONFIG_INET_SNARL        /* Sub Nets Are Local */
2092                 if ((saddr ^ daddr) & default_mask(saddr))
2093 #else
2094                 if ((saddr ^ daddr) & dev->pa_mask)
2095 #endif
2096                         newsk->mtu = 576 - HEADER_SIZE;
2097                 else
2098                         newsk->mtu = MAX_WINDOW;
2099         }
2100 
2101         /*
2102          *      But not bigger than device MTU 
2103          */
2104 
2105         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2106 
2107         /*
2108          *      This will min with what arrived in the packet 
2109          */
2110 
2111         tcp_options(newsk,skb->h.th);
2112 
2113         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2114         if (buff == NULL) 
2115         {
2116                 sk->err = -ENOMEM;
2117                 newsk->dead = 1;
2118                 release_sock(newsk);
2119                 kfree_skb(skb, FREE_READ);
2120                 tcp_statistics.TcpAttemptFails++;
2121                 return;
2122         }
2123   
2124         buff->len = sizeof(struct tcphdr)+4;
2125         buff->sk = newsk;
2126         buff->localroute = newsk->localroute;
2127 
2128         t1 =(struct tcphdr *) buff->data;
2129 
2130         /*
2131          *      Put in the IP header and routing stuff. 
2132          */
2133 
2134         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2135                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2136 
2137         /*
2138          *      Something went wrong. 
2139          */
2140 
2141         if (tmp < 0) 
2142         {
2143                 sk->err = tmp;
2144                 buff->free=1;
2145                 kfree_skb(buff,FREE_WRITE);
2146                 newsk->dead = 1;
2147                 release_sock(newsk);
2148                 skb->sk = sk;
2149                 kfree_skb(skb, FREE_READ);
2150                 tcp_statistics.TcpAttemptFails++;
2151                 return;
2152         }
2153 
2154         buff->len += tmp;
2155         t1 =(struct tcphdr *)((char *)t1 +tmp);
2156   
2157         memcpy(t1, skb->h.th, sizeof(*t1));
2158         buff->h.seq = newsk->write_seq;
2159         /*
2160          *      Swap the send and the receive. 
2161          */
2162         t1->dest = skb->h.th->source;
2163         t1->source = newsk->dummy_th.source;
2164         t1->seq = ntohl(newsk->write_seq++);
2165         t1->ack = 1;
2166         newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2167         newsk->sent_seq = newsk->write_seq;
2168         t1->window = ntohs(newsk->window);
2169         t1->res1 = 0;
2170         t1->res2 = 0;
2171         t1->rst = 0;
2172         t1->urg = 0;
2173         t1->psh = 0;
2174         t1->syn = 1;
2175         t1->ack_seq = ntohl(skb->h.th->seq+1);
2176         t1->doff = sizeof(*t1)/4+1;
2177         ptr =(unsigned char *)(t1+1);
2178         ptr[0] = 2;
2179         ptr[1] = 4;
2180         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2181         ptr[3] =(newsk->mtu) & 0xff;
2182 
2183         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2184         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2185 
2186         reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2187         skb->sk = newsk;
2188 
2189         /*
2190          *      Charge the sock_buff to newsk. 
2191          */
2192          
2193         sk->rmem_alloc -= skb->mem_len;
2194         newsk->rmem_alloc += skb->mem_len;
2195         
2196         skb_queue_tail(&sk->receive_queue,skb);
2197         sk->ack_backlog++;
2198         release_sock(newsk);
2199         tcp_statistics.TcpOutSegs++;
2200 }
2201 
2202 
2203 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2204 {
2205         struct sk_buff *buff;
2206         int need_reset = 0;
2207         struct tcphdr *t1, *th;
2208         struct proto *prot;
2209         struct device *dev=NULL;
2210         int tmp;
2211 
2212         /*
2213          * We need to grab some memory, and put together a FIN, 
2214          * and then put it into the queue to be sent.
2215          */
2216         sk->inuse = 1;
2217         sk->keepopen = 1;
2218         sk->shutdown = SHUTDOWN_MASK;
2219 
2220         if (!sk->dead) 
2221                 sk->state_change(sk);
2222 
2223         if (timeout == 0) 
2224         {
2225                 /*
2226                  *  We need to flush the recv. buffs.  We do this only on the
2227                  *  descriptor close, not protocol-sourced closes, because the
2228                  *  reader process may not have drained the data yet!
2229                  */
2230 
2231                 if (skb_peek(&sk->receive_queue) != NULL) 
2232                 {
2233                         struct sk_buff *skb;
2234                         if(sk->debug)
2235                                 printk("Clean rcv queue\n");
2236                         while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2237                         {
2238                                 /* The +1 is not needed because the FIN takes up seq
2239                                    is not read!!! */
2240                                 if(skb->len > 0 && after(skb->h.th->seq + skb->len , sk->copied_seq))
2241                                         need_reset = 1;
2242                                 kfree_skb(skb, FREE_READ);
2243                         }
2244                         if(sk->debug)
2245                                 printk("Cleaned.\n");
2246                 }
2247         }
2248 
2249         /*
2250          *      Get rid off any half-completed packets. 
2251          */
2252          
2253         if (sk->partial) 
2254         {
2255                 tcp_send_partial(sk);
2256         }
2257 
2258         switch(sk->state) 
2259         {
2260                 case TCP_FIN_WAIT1:
2261                 case TCP_FIN_WAIT2:
2262                 case TCP_CLOSING:
2263                         /*
2264                          * These states occur when we have already closed out
2265                          * our end.  If there is no timeout, we do not do
2266                          * anything.  We may still be in the middle of sending
2267                          * the remainder of our buffer, for example...
2268                          * resetting the timer would be inappropriate.
2269                          *
2270                          * XXX if retransmit count reaches limit, is tcp_close()
2271                          * called with timeout == 1 ? if not, we need to fix that.
2272                          */
2273                         if (!timeout) {
2274                                 int timer_active;
2275 
2276                                 timer_active = del_timer(&sk->timer);
2277                                 if (timer_active)
2278                                         add_timer(&sk->timer);
2279                                 else
2280                                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2281                         }
2282 #ifdef NOTDEF
2283                         /* 
2284                          *      Start a timer.
2285                          * original code was 4 * sk->rtt.  In converting to the
2286                          * new rtt representation, we can't quite use that.
2287                          * it seems to make most sense to  use the backed off value
2288                          */
2289                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2290 #endif
2291                         if (timeout) 
2292                                 tcp_time_wait(sk);
2293                         release_sock(sk);
2294                         return; /* break causes a double release - messy */
2295                 case TCP_TIME_WAIT:
2296                 case TCP_LAST_ACK:
2297                         /*
2298                          * A timeout from these states terminates the TCB.
2299                          */
2300                         if (timeout) 
2301                         {
2302                                 tcp_set_state(sk,TCP_CLOSE);
2303                         }
2304                         release_sock(sk);
2305                         return;
2306                 case TCP_LISTEN:
2307                         tcp_set_state(sk,TCP_CLOSE);
2308                         release_sock(sk);
2309                         return;
2310                 case TCP_CLOSE:
2311                         release_sock(sk);
2312                         return;
2313                 case TCP_CLOSE_WAIT:
2314                 case TCP_ESTABLISHED:
2315                 case TCP_SYN_SENT:
2316                 case TCP_SYN_RECV:
2317                         prot =(struct proto *)sk->prot;
2318                         th =(struct tcphdr *)&sk->dummy_th;
2319                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2320                         if (buff == NULL) 
2321                         {
2322                                 /* This will force it to try again later. */
2323                                 /* Or it would have if someone released the socket
2324                                    first. Anyway it might work now */
2325                                 release_sock(sk);
2326                                 if (sk->state != TCP_CLOSE_WAIT)
2327                                         tcp_set_state(sk,TCP_ESTABLISHED);
2328                                 reset_timer(sk, TIME_CLOSE, 100);
2329                                 return;
2330                         }
2331                         buff->sk = sk;
2332                         buff->free = 1;
2333                         buff->len = sizeof(*t1);
2334                         buff->localroute = sk->localroute;
2335                         t1 =(struct tcphdr *) buff->data;
2336         
2337                         /*
2338                          *      Put in the IP header and routing stuff. 
2339                          */
2340                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2341                                          IPPROTO_TCP, sk->opt,
2342                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2343                         if (tmp < 0) 
2344                         {
2345                                 sk->write_seq++;        /* Very important 8) */
2346                                 kfree_skb(buff,FREE_WRITE);
2347 
2348                                 /*
2349                                  * Enter FIN_WAIT1 to await completion of
2350                                  * written out data and ACK to our FIN.
2351                                  */
2352 
2353                                 if(sk->state==TCP_ESTABLISHED)
2354                                         tcp_set_state(sk,TCP_FIN_WAIT1);
2355                                 else
2356                                         tcp_set_state(sk,TCP_FIN_WAIT2);
2357                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2358                                 if(timeout)
2359                                         tcp_time_wait(sk);
2360 
2361                                 release_sock(sk);
2362                                 return;
2363                         }
2364 
2365                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2366                         buff->len += tmp;
2367                         buff->dev = dev;
2368                         memcpy(t1, th, sizeof(*t1));
2369                         t1->seq = ntohl(sk->write_seq);
2370                         sk->write_seq++;
2371                         buff->h.seq = sk->write_seq;
2372                         t1->ack = 1;
2373         
2374                         /* 
2375                          *      Ack everything immediately from now on. 
2376                          */
2377 
2378                         sk->delay_acks = 0;
2379                         t1->ack_seq = ntohl(sk->acked_seq);
2380                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2381                         t1->fin = 1;
2382                         t1->rst = need_reset;
2383                         t1->doff = sizeof(*t1)/4;
2384                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2385 
2386                         tcp_statistics.TcpOutSegs++;
2387         
2388                         if (skb_peek(&sk->write_queue) == NULL) 
2389                         {
2390                                 sk->sent_seq = sk->write_seq;
2391                                 prot->queue_xmit(sk, dev, buff, 0);
2392                         } 
2393                         else 
2394                         {
2395                                 reset_timer(sk, TIME_WRITE, sk->rto);
2396                                 if (buff->next != NULL) 
2397                                 {
2398                                         printk("tcp_close: next != NULL\n");
2399                                         skb_unlink(buff);
2400                                 }
2401                                 skb_queue_tail(&sk->write_queue, buff);
2402                         }
2403 
2404                         /*
2405                          * If established (normal close), enter FIN_WAIT1.
2406                          * If in CLOSE_WAIT, enter LAST_ACK
2407                          * If in CLOSING, remain in CLOSING
2408                          * otherwise enter FIN_WAIT2
2409                          */
2410 
2411                         if (sk->state == TCP_ESTABLISHED)
2412                                 tcp_set_state(sk,TCP_FIN_WAIT1);
2413                         else if (sk->state == TCP_CLOSE_WAIT)
2414                                 tcp_set_state(sk,TCP_LAST_ACK);
2415                         else if (sk->state != TCP_CLOSING)
2416                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2417         }
2418         release_sock(sk);
2419 }
2420 
2421 
2422 /*
2423  * This routine takes stuff off of the write queue,
2424  * and puts it in the xmit queue.
2425  */
2426 static void
2427 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2428 {
2429         struct sk_buff *skb;
2430 
2431         /*
2432          *      The bytes will have to remain here. In time closedown will
2433          *      empty the write queue and all will be happy 
2434          */
2435 
2436         if(sk->zapped)
2437                 return;
2438 
2439         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2440                 before(skb->h.seq, sk->window_seq + 1) &&
2441                 (sk->retransmits == 0 ||
2442                  sk->timeout != TIME_WRITE ||
2443                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2444                 && sk->packets_out < sk->cong_window) 
2445         {
2446                 IS_SKB(skb);
2447                 skb_unlink(skb);
2448                 /* See if we really need to send the packet. */
2449                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2450                 {
2451                         sk->retransmits = 0;
2452                         kfree_skb(skb, FREE_WRITE);
2453                         if (!sk->dead) 
2454                                 sk->write_space(sk);
2455                 } 
2456                 else
2457                 {
2458                         struct tcphdr *th;
2459                         struct iphdr *iph;
2460                         int size;
2461 /*
2462  * put in the ack seq and window at this point rather than earlier,
2463  * in order to keep them monotonic.  We really want to avoid taking
2464  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2465  * Ack and window will in general have changed since this packet was put
2466  * on the write queue.
2467  */
2468                         iph = (struct iphdr *)(skb->data +
2469                                                skb->dev->hard_header_len);
2470                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2471                         size = skb->len - (((unsigned char *) th) - skb->data);
2472                         
2473                         th->ack_seq = ntohl(sk->acked_seq);
2474                         th->window = ntohs(tcp_select_window(sk));
2475 
2476                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2477 
2478                         sk->sent_seq = skb->h.seq;
2479                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2480                 }
2481         }
2482 }
2483 
2484 
2485 /*
2486  *      This routine sorts the send list, and resets the
2487  *      sk->send_head and sk->send_tail pointers.
2488  */
2489 
2490 static void sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2491 {
2492         struct sk_buff *list = NULL;
2493         struct sk_buff *skb,*skb2,*skb3;
2494 
2495         for (skb = sk->send_head; skb != NULL; skb = skb2) 
2496         {
2497                 skb2 = skb->link3;
2498                 if (list == NULL || before (skb2->h.seq, list->h.seq)) 
2499                 {
2500                         skb->link3 = list;
2501                         sk->send_tail = skb;
2502                         list = skb;
2503                 }
2504                 else
2505                 {
2506                         for (skb3 = list; ; skb3 = skb3->link3) 
2507                         {
2508                                 if (skb3->link3 == NULL ||
2509                                     before(skb->h.seq, skb3->link3->h.seq))
2510                                 {
2511                                         skb->link3 = skb3->link3;
2512                                         skb3->link3 = skb;
2513                                         if (skb->link3 == NULL) 
2514                                                 sk->send_tail = skb;
2515                                         break;
2516                                 }
2517                         }
2518                 }
2519         }
2520         sk->send_head = list;
2521 }
2522   
2523 
2524 /*
2525  *      This routine deals with incoming acks, but not outgoing ones.
2526  */
2527 
2528 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2529 {
2530         unsigned long ack;
2531         int flag = 0;
2532 
2533         /* 
2534          * 1 - there was data in packet as well as ack or new data is sent or 
2535          *     in shutdown state
2536          * 2 - data from retransmit queue was acked and removed
2537          * 4 - window shrunk or data from retransmit queue was acked and removed
2538          */
2539 
2540         if(sk->zapped)
2541                 return(1);      /* Dead, cant ack any more so why bother */
2542 
2543         ack = ntohl(th->ack_seq);
2544         if (ntohs(th->window) > sk->max_window) 
2545         {
2546                 sk->max_window = ntohs(th->window);
2547 #ifdef CONFIG_INET_PCTCP
2548                 sk->mss = min(sk->max_window>>1, sk->mtu);
2549 #else
2550                 sk->mss = min(sk->max_window, sk->mtu);
2551 #endif  
2552         }
2553 
2554         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2555                 sk->retransmits = 0;
2556 
2557         if (after(ack, sk->sent_seq) || before(ack, sk->rcv_ack_seq)) 
2558         {
2559                 if(sk->debug)
2560                         printk("Ack ignored %lu %lu\n",ack,sk->sent_seq);
2561                         
2562                 /*
2563                  *      Keepalive processing.
2564                  */
2565                  
2566                 if (after(ack, sk->sent_seq) || (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2567                 {
2568                         return(0);
2569                 }
2570                 if (sk->keepopen) 
2571                 {
2572                         if(sk->timeout==TIME_KEEPOPEN)
2573                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2574                 }
2575                 return(1);
2576         }
2577 
2578         if (len != th->doff*4) 
2579                 flag |= 1;
2580 
2581         /* See if our window has been shrunk. */
2582 
2583         if (after(sk->window_seq, ack+ntohs(th->window))) 
2584         {
2585                 /*
2586                  * We may need to move packets from the send queue
2587                  * to the write queue, if the window has been shrunk on us.
2588                  * The RFC says you are not allowed to shrink your window
2589                  * like this, but if the other end does, you must be able
2590                  * to deal with it.
2591                  */
2592                 struct sk_buff *skb;
2593                 struct sk_buff *skb2;
2594                 struct sk_buff *wskb = NULL;
2595         
2596                 skb2 = sk->send_head;
2597                 sk->send_head = NULL;
2598                 sk->send_tail = NULL;
2599         
2600                 flag |= 4;
2601         
2602                 sk->window_seq = ack + ntohs(th->window);
2603                 cli();
2604                 while (skb2 != NULL) 
2605                 {
2606                         skb = skb2;
2607                         skb2 = skb->link3;
2608                         skb->link3 = NULL;
2609                         if (after(skb->h.seq, sk->window_seq)) 
2610                         {
2611                                 if (sk->packets_out > 0) 
2612                                         sk->packets_out--;
2613                                 /* We may need to remove this from the dev send list. */
2614                                 if (skb->next != NULL) 
2615                                 {
2616                                         skb_unlink(skb);                                
2617                                 }
2618                                 /* Now add it to the write_queue. */
2619                                 if (wskb == NULL)
2620                                         skb_queue_head(&sk->write_queue,skb);
2621                                 else
2622                                         skb_append(wskb,skb);
2623                                 wskb = skb;
2624                         } 
2625                         else 
2626                         {
2627                                 if (sk->send_head == NULL) 
2628                                 {
2629                                         sk->send_head = skb;
2630                                         sk->send_tail = skb;
2631                                 }
2632                                 else
2633                                 {
2634                                         sk->send_tail->link3 = skb;
2635                                         sk->send_tail = skb;
2636                                 }
2637                                 skb->link3 = NULL;
2638                         }
2639                 }
2640                 sti();
2641         }
2642 
2643         if (sk->send_tail == NULL || sk->send_head == NULL) 
2644         {
2645                 sk->send_head = NULL;
2646                 sk->send_tail = NULL;
2647                 sk->packets_out= 0;
2648         }
2649 
2650         sk->window_seq = ack + ntohs(th->window);
2651 
2652         /* We don't want too many packets out there. */
2653         if (sk->timeout == TIME_WRITE && 
2654                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2655         {
2656 /* 
2657  * This is Jacobson's slow start and congestion avoidance. 
2658  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2659  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2660  * counter and increment it once every cwnd times.  It's possible
2661  * that this should be done only if sk->retransmits == 0.  I'm
2662  * interpreting "new data is acked" as including data that has
2663  * been retransmitted but is just now being acked.
2664  */
2665                 if (sk->cong_window < sk->ssthresh)  
2666                   /* 
2667                    *    In "safe" area, increase
2668                    */
2669                         sk->cong_window++;
2670                 else 
2671                 {
2672                   /*
2673                    *    In dangerous area, increase slowly.  In theory this is
2674                    *    sk->cong_window += 1 / sk->cong_window
2675                    */
2676                         if (sk->cong_count >= sk->cong_window) 
2677                         {
2678                                 sk->cong_window++;
2679                                 sk->cong_count = 0;
2680                         }
2681                         else 
2682                                 sk->cong_count++;
2683                 }
2684         }
2685 
2686         sk->rcv_ack_seq = ack;
2687 
2688         /*
2689          * if this ack opens up a zero window, clear backoff.  It was
2690          * being used to time the probes, and is probably far higher than
2691          * it needs to be for normal retransmission.
2692          */
2693 
2694         if (sk->timeout == TIME_PROBE0) 
2695         {
2696                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2697                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2698                 {
2699                         sk->retransmits = 0;
2700                         sk->backoff = 0;
2701                   /*
2702                    *    Recompute rto from rtt.  this eliminates any backoff.
2703                    */
2704 
2705                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2706                         if (sk->rto > 120*HZ)
2707                                 sk->rto = 120*HZ;
2708                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2709                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2710                                                    .2 of a second is going to need huge windows (SIGH) */
2711                                 sk->rto = 20;
2712                 }
2713         }
2714 
2715   /* 
2716    *    See if we can take anything off of the retransmit queue.
2717    */
2718    
2719         while(sk->send_head != NULL) 
2720         {
2721                 /* Check for a bug. */
2722                 if (sk->send_head->link3 &&
2723                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2724                 {
2725                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2726                         sort_send(sk);
2727                 }
2728 
2729                 if (before(sk->send_head->h.seq, ack+1)) 
2730                 {
2731                         struct sk_buff *oskb;   
2732                         if (sk->retransmits) 
2733                         {       
2734                                 /*
2735                                  *      We were retransmitting.  don't count this in RTT est 
2736                                  */
2737                                 flag |= 2;
2738 
2739                                 /*
2740                                  * even though we've gotten an ack, we're still
2741                                  * retransmitting as long as we're sending from
2742                                  * the retransmit queue.  Keeping retransmits non-zero
2743                                  * prevents us from getting new data interspersed with
2744                                  * retransmissions.
2745                                  */
2746 
2747                                 if (sk->send_head->link3)
2748                                         sk->retransmits = 1;
2749                                 else
2750                                         sk->retransmits = 0;
2751                         }
2752                         /*
2753                          * Note that we only reset backoff and rto in the
2754                          * rtt recomputation code.  And that doesn't happen
2755                          * if there were retransmissions in effect.  So the
2756                          * first new packet after the retransmissions is
2757                          * sent with the backoff still in effect.  Not until
2758                          * we get an ack from a non-retransmitted packet do
2759                          * we reset the backoff and rto.  This allows us to deal
2760                          * with a situation where the network delay has increased
2761                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2762                          */
2763 
2764                         /*
2765                          *      We have one less packet out there. 
2766                          */
2767                          
2768                         if (sk->packets_out > 0) 
2769                                 sk->packets_out --;
2770                         /* 
2771                          *      Wake up the process, it can probably write more. 
2772                          */
2773                         if (!sk->dead) 
2774                                 sk->write_space(sk);
2775                         oskb = sk->send_head;
2776 
2777                         if (!(flag&2)) 
2778                         {
2779                                 long m;
2780         
2781                                 /*
2782                                  *      The following amusing code comes from Jacobson's
2783                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2784                                  *      are scaled versions of rtt and mean deviation.
2785                                  *      This is designed to be as fast as possible 
2786                                  *      m stands for "measurement".
2787                                  */
2788         
2789                                 m = jiffies - oskb->when;  /* RTT */
2790                                 if(m<=0)
2791                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2792                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2793                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2794                                 if (m < 0)
2795                                         m = -m;         /* m is now abs(error) */
2796                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2797                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2798         
2799                                 /*
2800                                  *      Now update timeout.  Note that this removes any backoff.
2801                                  */
2802                          
2803                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2804                                 if (sk->rto > 120*HZ)
2805                                         sk->rto = 120*HZ;
2806                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2807                                         sk->rto = 20;
2808                                 sk->backoff = 0;
2809                         }
2810                         flag |= (2|4);
2811                         cli();
2812                         oskb = sk->send_head;
2813                         IS_SKB(oskb);
2814                         sk->send_head = oskb->link3;
2815                         if (sk->send_head == NULL) 
2816                         {
2817                                 sk->send_tail = NULL;
2818                         }
2819 
2820                 /*
2821                  *      We may need to remove this from the dev send list. 
2822                  */
2823 
2824                         if (oskb->next)
2825                                 skb_unlink(oskb);
2826                         sti();
2827                         kfree_skb(oskb, FREE_WRITE); /* write. */
2828                         if (!sk->dead) 
2829                                 sk->write_space(sk);
2830                 }
2831                 else
2832                 {
2833                         break;
2834                 }
2835         }
2836 
2837         /*
2838          * XXX someone ought to look at this too.. at the moment, if skb_peek()
2839          * returns non-NULL, we complete ignore the timer stuff in the else
2840          * clause.  We ought to organize the code so that else clause can
2841          * (should) be executed regardless, possibly moving the PROBE timer
2842          * reset over.  The skb_peek() thing should only move stuff to the
2843          * write queue, NOT also manage the timer functions.
2844          */
2845 
2846         /*
2847          * Maybe we can take some stuff off of the write queue,
2848          * and put it onto the xmit queue.
2849          */
2850         if (skb_peek(&sk->write_queue) != NULL) 
2851         {
2852                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2853                         (sk->retransmits == 0 || 
2854                          sk->timeout != TIME_WRITE ||
2855                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2856                         && sk->packets_out < sk->cong_window) 
2857                 {
2858                         flag |= 1;
2859                         tcp_write_xmit(sk);
2860                 }
2861                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2862                         sk->send_head == NULL &&
2863                         sk->ack_backlog == 0 &&
2864                         sk->state != TCP_TIME_WAIT) 
2865                 {
2866                         reset_timer(sk, TIME_PROBE0, sk->rto);
2867                 }               
2868         }
2869         else
2870         {
2871                 /*
2872                  * from TIME_WAIT we stay in TIME_WAIT as long as we rx packets
2873                  * from TCP_CLOSE we don't do anything
2874                  *
2875                  * from anything else, if there is write data (or fin) pending,
2876                  * we use a TIME_WRITE timeout, else if keepalive we reset to
2877                  * a KEEPALIVE timeout, else we delete the timer.
2878                  *
2879                  * We do not set flag for nominal write data, otherwise we may
2880                  * force a state where we start to write itsy bitsy tidbits
2881                  * of data.
2882                  */
2883 
2884                 switch(sk->state) {
2885                 case TCP_TIME_WAIT:
2886                         /*
2887                          * keep us in TIME_WAIT until we stop getting packets,
2888                          * reset the timeout.
2889                          */
2890                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2891                         break;
2892                 case TCP_CLOSE:
2893                         /*
2894                          * don't touch the timer.
2895                          */
2896                         break;
2897                 default:
2898                         /*
2899                          * must check send_head, write_queue, and ack_backlog
2900                          * to determine which timeout to use.
2901                          */
2902                         if (sk->send_head || skb_peek(&sk->write_queue) != NULL || sk->ack_backlog) {
2903                                 reset_timer(sk, TIME_WRITE, sk->rto);
2904                         } else if (sk->keepopen) {
2905                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2906                         } else {
2907                                 delete_timer(sk);
2908                         }
2909                         break;
2910                 }
2911 #ifdef NOTDEF
2912                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2913                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2914                 {
2915                         if (!sk->dead)
2916                                 sk->write_space(sk);
2917                         if (sk->keepopen) {
2918                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2919                         } else {
2920                                 delete_timer(sk);
2921                         }
2922                 }
2923                 else
2924                 {
2925                         if (sk->state != (unsigned char) sk->keepopen) 
2926                         {
2927                                 reset_timer(sk, TIME_WRITE, sk->rto);
2928                         }
2929                         if (sk->state == TCP_TIME_WAIT) 
2930                         {
2931                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2932                         }       
2933                 }
2934 #endif
2935         }
2936 
2937         if (sk->packets_out == 0 && sk->partial != NULL &&
2938                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2939         {
2940                 flag |= 1;
2941                 tcp_send_partial(sk);
2942         }
2943 
2944         /*
2945          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2946          * we are now waiting for an acknowledge to our FIN.  The other end is
2947          * already in TIME_WAIT.
2948          *
2949          * Move to TCP_CLOSE on success.
2950          */
2951 
2952         if (sk->state == TCP_LAST_ACK) 
2953         {
2954                 if (!sk->dead)
2955                         sk->state_change(sk);
2956                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2957                 {
2958                         flag |= 1;
2959                         tcp_set_state(sk,TCP_CLOSE);
2960                         sk->shutdown = SHUTDOWN_MASK;
2961                 }
2962         }
2963 
2964         /*
2965          * Incoming ACK to a FIN we sent in the case of our initiating the close.
2966          *
2967          * Move to FIN_WAIT2 to await a FIN from the other end. Set
2968          * SEND_SHUTDOWN but not RCV_SHUTDOWN as data can still be coming in.
2969          */
2970 
2971         if (sk->state == TCP_FIN_WAIT1) 
2972         {
2973 
2974                 if (!sk->dead) 
2975                         sk->state_change(sk);
2976                 if (sk->rcv_ack_seq == sk->write_seq) 
2977                 {
2978                         flag |= 1;
2979 #ifdef THIS_BIT_IS_WRONG                        
2980                         if (sk->acked_seq != sk->fin_seq) 
2981                         {
2982                                 tcp_time_wait(sk);
2983                         }
2984                         else
2985 #endif                  
2986                         {
2987                                 sk->shutdown |= SEND_SHUTDOWN;
2988                                 tcp_set_state(sk,TCP_FIN_WAIT2);
2989                         }
2990                 }
2991         }
2992 
2993         /*
2994          *      Incoming ACK to a FIN we sent in the case of a simultaneous close.
2995          *
2996          *      Move to TIME_WAIT
2997          */
2998 
2999         if (sk->state == TCP_CLOSING) 
3000         {
3001 
3002                 if (!sk->dead) 
3003                         sk->state_change(sk);
3004                 if (sk->rcv_ack_seq == sk->write_seq) 
3005                 {
3006                         flag |= 1;
3007                         tcp_time_wait(sk);
3008                 }
3009         }
3010 
3011         /*
3012          * I make no guarantees about the first clause in the following
3013          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
3014          * what conditions "!flag" would be true.  However I think the rest
3015          * of the conditions would prevent that from causing any
3016          * unnecessary retransmission. 
3017          *   Clearly if the first packet has expired it should be 
3018          * retransmitted.  The other alternative, "flag&2 && retransmits", is
3019          * harder to explain:  You have to look carefully at how and when the
3020          * timer is set and with what timeout.  The most recent transmission always
3021          * sets the timer.  So in general if the most recent thing has timed
3022          * out, everything before it has as well.  So we want to go ahead and
3023          * retransmit some more.  If we didn't explicitly test for this
3024          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
3025          * would not be true.  If you look at the pattern of timing, you can
3026          * show that rto is increased fast enough that the next packet would
3027          * almost never be retransmitted immediately.  Then you'd end up
3028          * waiting for a timeout to send each packet on the retransmission
3029          * queue.  With my implementation of the Karn sampling algorithm,
3030          * the timeout would double each time.  The net result is that it would
3031          * take a hideous amount of time to recover from a single dropped packet.
3032          * It's possible that there should also be a test for TIME_WRITE, but
3033          * I think as long as "send_head != NULL" and "retransmit" is on, we've
3034          * got to be in real retransmission mode.
3035          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
3036          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
3037          * As long as no further losses occur, this seems reasonable.
3038          */
3039         
3040         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
3041                (((flag&2) && sk->retransmits) ||
3042                (sk->send_head->when + sk->rto < jiffies))) 
3043         {
3044                 ip_do_retransmit(sk, 1);
3045                 reset_timer(sk, TIME_WRITE, sk->rto);
3046         }
3047 
3048         return(1);
3049 }
3050 
3051 
3052 /*
3053  *      This routine handles the data.  If there is room in the buffer,
3054  *      it will be have already been moved into it.  If there is no
3055  *      room, then we will just have to discard the packet.
3056  */
3057 
3058 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
3059          unsigned long saddr, unsigned short len)
3060 {
3061         struct sk_buff *skb1, *skb2;
3062         struct tcphdr *th;
3063         int dup_dumped=0;
3064         unsigned long new_seq;
3065 
3066         th = skb->h.th;
3067         skb->len = len -(th->doff*4);
3068 
3069         /* The bytes in the receive read/assembly queue has increased. Needed for the
3070            low memory discard algorithm */
3071            
3072         sk->bytes_rcv += skb->len;
3073         
3074         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
3075         {
3076                 /* 
3077                  *      Don't want to keep passing ack's back and forth. 
3078                  *      (someone sent us dataless, boring frame)
3079                  */
3080                 if (!th->ack)
3081                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
3082                 kfree_skb(skb, FREE_READ);
3083                 return(0);
3084         }
3085         
3086         /*
3087          *      We no longer have anyone receiving data on this connection.
3088          */
3089 
3090         if(sk->shutdown & RCV_SHUTDOWN)
3091         {
3092                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
3093                 
3094                 if(after(new_seq,sk->/*copied*/acked_seq+1))    /* If the right edge of this frame is after the last copied byte
3095                                                            then it contains data we will never touch. We send an RST to 
3096                                                            ensure the far end knows it never got to the application */
3097                 {
3098                         sk->acked_seq = new_seq + th->fin;
3099                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
3100                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
3101                         tcp_statistics.TcpEstabResets++;
3102                         tcp_set_state(sk,TCP_CLOSE);
3103                         sk->err = EPIPE;
3104                         sk->shutdown = SHUTDOWN_MASK;
3105                         kfree_skb(skb, FREE_READ);
3106                         if (!sk->dead)
3107                                 sk->state_change(sk);
3108                         return(0);
3109                 }
3110 #if 0           
3111                 /* Discard the frame here - we've already proved its a duplicate */
3112                 
3113                 kfree_skb(skb, FREE_READ);
3114                 return(0);                              
3115 #endif          
3116         }
3117         /*
3118          *      Now we have to walk the chain, and figure out where this one
3119          *      goes into it.  This is set up so that the last packet we received
3120          *      will be the first one we look at, that way if everything comes
3121          *      in order, there will be no performance loss, and if they come
3122          *      out of order we will be able to fit things in nicely.
3123          */
3124 
3125         /* 
3126          *      This should start at the last one, and then go around forwards.
3127          */
3128 
3129         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
3130         {
3131                 skb_queue_head(&sk->receive_queue,skb);
3132                 skb1= NULL;
3133         } 
3134         else
3135         {
3136                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
3137                 {
3138                         if(sk->debug)
3139                         {
3140                                 printk("skb1=%p :", skb1);
3141                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
3142                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
3143                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
3144                                                 sk->acked_seq);
3145                         }
3146                         
3147                         /*
3148                          *      Optimisation: Duplicate frame or extension of previous frame from
3149                          *      same sequence point (lost ack case).
3150                          *      The frame contains duplicate data or replaces a previous frame
3151                          *      discard the previous frame (safe as sk->inuse is set) and put
3152                          *      the new one in its place.
3153                          */
3154                          
3155                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3156                         {
3157                                 skb_append(skb1,skb);
3158                                 skb_unlink(skb1);
3159                                 kfree_skb(skb1,FREE_READ);
3160                                 dup_dumped=1;
3161                                 skb1=NULL;
3162                                 break;
3163                         }
3164                         
3165                         /*
3166                          *      Found where it fits
3167                          */
3168                          
3169                         if (after(th->seq+1, skb1->h.th->seq))
3170                         {
3171                                 skb_append(skb1,skb);
3172                                 break;
3173                         }
3174                         
3175                         /*
3176                          *      See if we've hit the start. If so insert.
3177                          */
3178                         if (skb1 == skb_peek(&sk->receive_queue))
3179                         {
3180                                 skb_queue_head(&sk->receive_queue, skb);
3181                                 break;
3182                         }
3183                 }
3184         }
3185 
3186         /*
3187          *      Figure out what the ack value for this frame is
3188          */
3189          
3190         th->ack_seq = th->seq + skb->len;
3191         if (th->syn) 
3192                 th->ack_seq++;
3193         if (th->fin)
3194                 th->ack_seq++;
3195 
3196         if (before(sk->acked_seq, sk->copied_seq)) 
3197         {
3198                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3199                 sk->acked_seq = sk->copied_seq;
3200         }
3201 
3202         /*
3203          *      Now figure out if we can ack anything.
3204          */
3205 
3206         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3207         {
3208                 if (before(th->seq, sk->acked_seq+1)) 
3209                 {
3210                         int newwindow;
3211 
3212                         if (after(th->ack_seq, sk->acked_seq)) 
3213                         {
3214                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3215                                 if (newwindow < 0)
3216                                         newwindow = 0;  
3217                                 sk->window = newwindow;
3218                                 sk->acked_seq = th->ack_seq;
3219                         }
3220                         skb->acked = 1;
3221 
3222                         /* 
3223                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3224                          */
3225 
3226                         if (skb->h.th->fin) 
3227                         {
3228                                 if (!sk->dead) 
3229                                         sk->state_change(sk);
3230                                 sk->shutdown |= RCV_SHUTDOWN;
3231                         }
3232           
3233                         for(skb2 = skb->next;
3234                             skb2 != (struct sk_buff *)&sk->receive_queue;
3235                             skb2 = skb2->next) 
3236                         {
3237                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3238                                 {
3239                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3240                                         {
3241                                                 newwindow = sk->window -
3242                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3243                                                 if (newwindow < 0)
3244                                                         newwindow = 0;  
3245                                                 sk->window = newwindow;
3246                                                 sk->acked_seq = skb2->h.th->ack_seq;
3247                                         }
3248                                         skb2->acked = 1;
3249                                         /*
3250                                          *      When we ack the fin, we turn on
3251                                          *      the RCV_SHUTDOWN flag.
3252                                          */
3253                                         if (skb2->h.th->fin) 
3254                                         {
3255                                                 sk->shutdown |= RCV_SHUTDOWN;
3256                                                 if (!sk->dead)
3257                                                         sk->state_change(sk);
3258                                         }
3259 
3260                                         /*
3261                                          *      Force an immediate ack.
3262                                          */
3263                                          
3264                                         sk->ack_backlog = sk->max_ack_backlog;
3265                                 }
3266                                 else
3267                                 {
3268                                         break;
3269                                 }
3270                         }
3271 
3272                         /*
3273                          *      This also takes care of updating the window.
3274                          *      This if statement needs to be simplified.
3275                          */
3276                         if (!sk->delay_acks ||
3277                             sk->ack_backlog >= sk->max_ack_backlog || 
3278                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3279         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3280                         }
3281                         else 
3282                         {
3283                                 sk->ack_backlog++;
3284                                 if(sk->debug)
3285                                         printk("Ack queued.\n");
3286                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3287                         }
3288                 }
3289         }
3290 
3291         /*
3292          *      If we've missed a packet, send an ack.
3293          *      Also start a timer to send another.
3294          */
3295          
3296         if (!skb->acked) 
3297         {
3298         
3299         /*
3300          *      This is important.  If we don't have much room left,
3301          *      we need to throw out a few packets so we have a good
3302          *      window.  Note that mtu is used, not mss, because mss is really
3303          *      for the send side.  He could be sending us stuff as large as mtu.
3304          */
3305                  
3306                 while (sk->prot->rspace(sk) < sk->mtu) 
3307                 {
3308                         skb1 = skb_peek(&sk->receive_queue);
3309                         if (skb1 == NULL) 
3310                         {
3311                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3312                                 break;
3313                         }
3314 
3315                         /*
3316                          *      Don't throw out something that has been acked. 
3317                          */
3318                  
3319                         if (skb1->acked) 
3320                         {
3321                                 break;
3322                         }
3323                 
3324                         skb_unlink(skb1);
3325                         kfree_skb(skb1, FREE_READ);
3326                 }
3327                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3328                 sk->ack_backlog++;
3329                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3330         }
3331         else
3332         {
3333                 /* We missed a packet.  Send an ack to try to resync things. */
3334                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3335         }
3336 
3337         /*
3338          *      Now tell the user we may have some data. 
3339          */
3340          
3341         if (!sk->dead) 
3342         {
3343                 if(sk->debug)
3344                         printk("Data wakeup.\n");
3345                 sk->data_ready(sk,0);
3346         } 
3347         return(0);
3348 }
3349 
3350 
3351 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3352 {
3353         unsigned long ptr = ntohs(th->urg_ptr);
3354 
3355         if (ptr)
3356                 ptr--;
3357         ptr += th->seq;
3358 
3359         /* ignore urgent data that we've already seen and read */
3360         if (after(sk->copied_seq+1, ptr))
3361                 return;
3362 
3363         /* do we already have a newer (or duplicate) urgent pointer? */
3364         if (sk->urg_data && !after(ptr, sk->urg_seq))
3365                 return;
3366 
3367         /* tell the world about our new urgent pointer */
3368         if (sk->proc != 0) {
3369                 if (sk->proc > 0) {
3370                         kill_proc(sk->proc, SIGURG, 1);
3371                 } else {
3372                         kill_pg(-sk->proc, SIGURG, 1);
3373                 }
3374         }
3375         sk->urg_data = URG_NOTYET;
3376         sk->urg_seq = ptr;
3377 }
3378 
3379 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3380         unsigned long saddr, unsigned long len)
3381 {
3382         unsigned long ptr;
3383 
3384         /* check if we get a new urgent pointer */
3385         if (th->urg)
3386                 tcp_check_urg(sk,th);
3387 
3388         /* do we wait for any urgent data? */
3389         if (sk->urg_data != URG_NOTYET)
3390                 return 0;
3391 
3392         /* is the urgent pointer pointing into this packet? */
3393         ptr = sk->urg_seq - th->seq + th->doff*4;
3394         if (ptr >= len)
3395                 return 0;
3396 
3397         /* ok, got the correct packet, update info */
3398         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3399         if (!sk->dead)
3400                 sk->data_ready(sk,0);
3401         return 0;
3402 }
3403 
3404 
3405 /*
3406  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3407  *
3408  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3409  *  (and thence onto LAST-ACK and finally, CLOSE, we never enter
3410  *  TIME-WAIT)
3411  *
3412  *  If we are in FINWAIT-1, a received FIN indicates simultaneous
3413  *  close and we go into CLOSING (and later onto TIME-WAIT)
3414  *
3415  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3416  *
3417  */
3418 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3419          unsigned long saddr, struct device *dev)
3420 {
3421         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3422 
3423         if (!sk->dead) 
3424         {
3425                 sk->state_change(sk);
3426         }
3427 
3428         switch(sk->state) 
3429         {
3430                 case TCP_SYN_RECV:
3431                 case TCP_SYN_SENT:
3432                 case TCP_ESTABLISHED:
3433                         /*
3434                          * move to CLOSE_WAIT, tcp_data() already handled
3435                          * sending the ack.
3436                          */
3437                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3438                         /*sk->fin_seq = th->seq+1;*/
3439                         tcp_set_state(sk,TCP_CLOSE_WAIT);
3440                         if (th->rst)
3441                                 sk->shutdown = SHUTDOWN_MASK;
3442                         break;
3443 
3444                 case TCP_CLOSE_WAIT:
3445                 case TCP_CLOSING:
3446                         /*
3447                          * received a retransmission of the FIN, do
3448                          * nothing.
3449                          */
3450                         break;
3451                 case TCP_TIME_WAIT:
3452                         /*
3453                          * received a retransmission of the FIN,
3454                          * restart the TIME_WAIT timer.
3455                          */
3456                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3457                         return(0);
3458                 case TCP_FIN_WAIT1:
3459                         /*
3460                          * This case occurs when a simultaneous close
3461                          * happens, we must ack the received FIN and
3462                          * enter the CLOSING state.
3463                          *
3464                          * XXX timeout not set properly
3465                          */
3466 
3467                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3468                         /*sk->fin_seq = th->seq+1;*/
3469                         tcp_set_state(sk,TCP_CLOSING);
3470                         break;
3471                 case TCP_FIN_WAIT2:
3472                         /*
3473                          * received a FIN -- send ACK and enter TIME_WAIT
3474                          */
3475                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3476                         /*sk->fin_seq = th->seq+1;*/
3477                         sk->shutdown|=SHUTDOWN_MASK;
3478                         tcp_set_state(sk,TCP_TIME_WAIT);
3479                         break;
3480                 case TCP_CLOSE:
3481                         /*
3482                          * already in CLOSE
3483                          */
3484                         break;
3485                 default:
3486                         tcp_set_state(sk,TCP_LAST_ACK);
3487         
3488                         /* Start the timers. */
3489                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3490                         return(0);
3491         }
3492         sk->ack_backlog++;
3493 
3494         return(0);
3495 }
3496 
3497 
3498 /* This will accept the next outstanding connection. */
3499 static struct sock *
3500 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3501 {
3502         struct sock *newsk;
3503         struct sk_buff *skb;
3504   
3505   /*
3506    * We need to make sure that this socket is listening,
3507    * and that it has something pending.
3508    */
3509 
3510         if (sk->state != TCP_LISTEN) 
3511         {
3512                 sk->err = EINVAL;
3513                 return(NULL); 
3514         }
3515 
3516         /* Avoid the race. */
3517         cli();
3518         sk->inuse = 1;
3519 
3520         while((skb = tcp_dequeue_established(sk)) == NULL) 
3521         {
3522                 if (flags & O_NONBLOCK) 
3523                 {
3524                         sti();
3525                         release_sock(sk);
3526                         sk->err = EAGAIN;
3527                         return(NULL);
3528                 }
3529 
3530                 release_sock(sk);
3531                 interruptible_sleep_on(sk->sleep);
3532                 if (current->signal & ~current->blocked) 
3533                 {
3534                         sti();
3535                         sk->err = ERESTARTSYS;
3536                         return(NULL);
3537                 }
3538                 sk->inuse = 1;
3539         }
3540         sti();
3541 
3542         /*
3543          *      Now all we need to do is return skb->sk. 
3544          */
3545 
3546         newsk = skb->sk;
3547 
3548         kfree_skb(skb, FREE_READ);
3549         sk->ack_backlog--;
3550         release_sock(sk);
3551         return(newsk);
3552 }
3553 
3554 
3555 /*
3556  *      This will initiate an outgoing connection. 
3557  */
3558  
3559 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3560 {
3561         struct sk_buff *buff;
3562         struct device *dev=NULL;
3563         unsigned char *ptr;
3564         int tmp;
3565         struct tcphdr *t1;
3566         struct rtable *rt;
3567 
3568         if (sk->state != TCP_CLOSE) 
3569                 return(-EISCONN);
3570 
3571         if (addr_len < 8) 
3572                 return(-EINVAL);
3573 
3574         if (usin->sin_family && usin->sin_family != AF_INET) 
3575                 return(-EAFNOSUPPORT);
3576 
3577         /*
3578          *      connect() to INADDR_ANY means loopback (BSD'ism).
3579          */
3580         
3581         if(usin->sin_addr.s_addr==INADDR_ANY)
3582                 usin->sin_addr.s_addr=ip_my_addr();
3583                   
3584         /*
3585          *      Don't want a TCP connection going to a broadcast address 
3586          */
3587 
3588         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3589         { 
3590                 return -ENETUNREACH;
3591         }
3592   
3593         /*
3594          *      Connect back to the same socket: Blows up so disallow it 
3595          */
3596 
3597         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3598                 return -EBUSY;
3599 
3600         sk->inuse = 1;
3601         sk->daddr = usin->sin_addr.s_addr;
3602         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3603         sk->window_seq = sk->write_seq;
3604         sk->rcv_ack_seq = sk->write_seq -1;
3605         sk->err = 0;
3606         sk->dummy_th.dest = usin->sin_port;
3607         release_sock(sk);
3608 
3609         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3610         if (buff == NULL) 
3611         {
3612                 return(-ENOMEM);
3613         }
3614         sk->inuse = 1;
3615         buff->len = 24;
3616         buff->sk = sk;
3617         buff->free = 1;
3618         buff->localroute = sk->localroute;
3619         
3620         t1 = (struct tcphdr *) buff->data;
3621 
3622         /*
3623          *      Put in the IP header and routing stuff. 
3624          */
3625          
3626         rt=ip_rt_route(sk->daddr, NULL, NULL);
3627         
3628 
3629         /*
3630          *      We need to build the routing stuff from the things saved in skb. 
3631          */
3632 
3633         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3634                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3635         if (tmp < 0) 
3636         {
3637                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3638                 release_sock(sk);
3639                 return(-ENETUNREACH);
3640         }
3641 
3642         buff->len += tmp;
3643         t1 = (struct tcphdr *)((char *)t1 +tmp);
3644 
3645         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3646         t1->seq = ntohl(sk->write_seq++);
3647         sk->sent_seq = sk->write_seq;
3648         buff->h.seq = sk->write_seq;
3649         t1->ack = 0;
3650         t1->window = 2;
3651         t1->res1=0;
3652         t1->res2=0;
3653         t1->rst = 0;
3654         t1->urg = 0;
3655         t1->psh = 0;
3656         t1->syn = 1;
3657         t1->urg_ptr = 0;
3658         t1->doff = 6;
3659         /* use 512 or whatever user asked for */
3660         
3661         if(rt!=NULL && (rt->rt_flags&RTF_WINDOW))
3662                 sk->window_clamp=rt->rt_window;
3663         else
3664                 sk->window_clamp=0;
3665 
3666         if (sk->user_mss)
3667                 sk->mtu = sk->user_mss;
3668         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
3669                 sk->mtu = rt->rt_mss;
3670         else 
3671         {
3672 #ifdef CONFIG_INET_SNARL
3673                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3674 #else
3675                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3676 #endif
3677                         sk->mtu = 576 - HEADER_SIZE;
3678                 else
3679                         sk->mtu = MAX_WINDOW;
3680         }
3681         /*
3682          *      but not bigger than device MTU 
3683          */
3684 
3685         if(sk->mtu <32)
3686                 sk->mtu = 32;   /* Sanity limit */
3687                 
3688         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3689         
3690         /*
3691          *      Put in the TCP options to say MTU. 
3692          */
3693 
3694         ptr = (unsigned char *)(t1+1);
3695         ptr[0] = 2;
3696         ptr[1] = 4;
3697         ptr[2] = (sk->mtu) >> 8;
3698         ptr[3] = (sk->mtu) & 0xff;
3699         tcp_send_check(t1, sk->saddr, sk->daddr,
3700                   sizeof(struct tcphdr) + 4, sk);
3701 
3702         /*
3703          *      This must go first otherwise a really quick response will get reset. 
3704          */
3705 
3706         tcp_set_state(sk,TCP_SYN_SENT);
3707 /*      sk->rtt = TCP_CONNECT_TIME;*/
3708         sk->rto = TCP_TIMEOUT_INIT;
3709         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3710         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3711 
3712         sk->prot->queue_xmit(sk, dev, buff, 0);  
3713         tcp_statistics.TcpActiveOpens++;
3714         tcp_statistics.TcpOutSegs++;
3715   
3716         release_sock(sk);
3717         return(0);
3718 }
3719 
3720 
3721 /* This functions checks to see if the tcp header is actually acceptable. */
3722 static int
3723 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3724              struct options *opt, unsigned long saddr, struct device *dev)
3725 {
3726         unsigned long next_seq;
3727 
3728         next_seq = len - 4*th->doff;
3729         if (th->fin)
3730                 next_seq++;
3731         /* if we have a zero window, we can't have any data in the packet.. */
3732         if (next_seq && !sk->window)
3733                 goto ignore_it;
3734         next_seq += th->seq;
3735 
3736         /*
3737          * This isn't quite right.  sk->acked_seq could be more recent
3738          * than sk->window.  This is however close enough.  We will accept
3739          * slightly more packets than we should, but it should not cause
3740          * problems unless someone is trying to forge packets.
3741          */
3742 
3743         /* have we already seen all of this packet? */
3744         if (!after(next_seq+1, sk->acked_seq))
3745                 goto ignore_it;
3746         /* or does it start beyond the window? */
3747         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3748                 goto ignore_it;
3749 
3750         /* ok, at least part of this packet would seem interesting.. */
3751         return 1;
3752 
3753 ignore_it:
3754         if (th->rst)
3755                 return 0;
3756 
3757         /*
3758          *      Send a reset if we get something not ours and we are
3759          *      unsynchronized. Note: We don't do anything to our end. We
3760          *      are just killing the bogus remote connection then we will
3761          *      connect again and it will work (with luck).
3762          */
3763          
3764         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3765                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3766                 return 1;
3767         }
3768 
3769         /* Try to resync things. */
3770         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3771         return 0;
3772 }
3773 
3774 
3775 #ifdef TCP_FASTPATH
3776 /*
3777  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3778  *      Yes if
3779  *      a) The queue is empty
3780  *      b) The last frame on the queue has the acked flag set
3781  */
3782 
3783 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3784 {
3785         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3786         if(skb==NULL || sk->receive_queue.prev->acked)
3787                 return 1;
3788 }
3789 
3790 #endif
3791 
3792 int
3793 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3794         unsigned long daddr, unsigned short len,
3795         unsigned long saddr, int redo, struct inet_protocol * protocol)
3796 {
3797         struct tcphdr *th;
3798         struct sock *sk;
3799 
3800         if (!skb) 
3801         {
3802                 return(0);
3803         }
3804 
3805         if (!dev) 
3806         {
3807                 return(0);
3808         }
3809   
3810         tcp_statistics.TcpInSegs++;
3811   
3812         if(skb->pkt_type!=PACKET_HOST)
3813         {
3814                 kfree_skb(skb,FREE_READ);
3815                 return(0);
3816         }
3817   
3818         th = skb->h.th;
3819 
3820         /*
3821          *      Find the socket.
3822          */
3823 
3824         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3825 
3826         /*
3827          *      If this socket has got a reset its to all intents and purposes 
3828          *      really dead 
3829          */
3830          
3831         if (sk!=NULL && sk->zapped)
3832                 sk=NULL;
3833 
3834         if (!redo) 
3835         {
3836                 if (tcp_check(th, len, saddr, daddr )) 
3837                 {
3838                         skb->sk = NULL;
3839                         kfree_skb(skb,FREE_READ);
3840                         /*
3841                          * We don't release the socket because it was
3842                          * never marked in use.
3843                          */
3844                         return(0);
3845                 }
3846                 th->seq = ntohl(th->seq);
3847 
3848                 /* See if we know about the socket. */
3849                 if (sk == NULL) 
3850                 {
3851                         if (!th->rst)
3852                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3853                         skb->sk = NULL;
3854                         kfree_skb(skb, FREE_READ);
3855                         return(0);
3856                 }
3857 
3858                 skb->len = len;
3859                 skb->sk = sk;
3860                 skb->acked = 0;
3861                 skb->used = 0;
3862                 skb->free = 0;
3863                 skb->saddr = daddr;
3864                 skb->daddr = saddr;
3865         
3866                 /* We may need to add it to the backlog here. */
3867                 cli();
3868                 if (sk->inuse) 
3869                 {
3870                         skb_queue_head(&sk->back_log, skb);
3871                         sti();
3872                         return(0);
3873                 }
3874                 sk->inuse = 1;
3875                 sti();
3876         }
3877         else
3878         {
3879                 if (!sk) 
3880                 {
3881                         return(0);
3882                 }
3883         }
3884 
3885 
3886         if (!sk->prot) 
3887         {
3888                 return(0);
3889         }
3890 
3891 
3892         /*
3893          *      Charge the memory to the socket. 
3894          */
3895          
3896         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3897         {
3898                 skb->sk = NULL;
3899                 kfree_skb(skb, FREE_READ);
3900                 release_sock(sk);
3901                 return(0);
3902         }
3903 
3904         sk->rmem_alloc += skb->mem_len;
3905 
3906 #ifdef TCP_FASTPATH
3907 /*
3908  *      Incoming data stream fastpath. 
3909  *
3910  *      We try to optimise two things.
3911  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3912  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3913  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3914  *
3915  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3916  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3917  *      speed although further optimizing here is possible.
3918  */
3919  
3920         /* I'm trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3921         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst)
3922         {       
3923                 /* Packets in order. Fits window */
3924                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3925                 {
3926                         /* Ack is harder */
3927                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3928                         {
3929                                 kfree_skb(skb, FREE_READ);
3930                                 release_sock(sk);
3931                                 return 0;
3932                         }
3933                         /*
3934                          *      Set up variables
3935                          */
3936                         skb->len -= (th->doff *4);
3937                         sk->bytes_rcv += skb->len;
3938                         tcp_rx_hit2++;
3939                         if(skb->len)
3940                         {
3941                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3942                                 if(sk->window >= skb->len)
3943                                         sk->window-=skb->len;                   /* We know its effect on the window */
3944                                 else
3945                                         sk->window=0;
3946                                 sk->acked_seq = th->seq+skb->len;       /* Easy */
3947                                 skb->acked=1;                           /* Guaranteed true */
3948                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3949                                         sk->bytes_rcv > sk->max_unacked)
3950                                 {
3951                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3952                                 }
3953                                 else
3954                                 {
3955                                         sk->ack_backlog++;
3956                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3957                                 }
3958                                 if(!sk->dead)
3959                                         sk->data_ready(sk,0);
3960                                 release_sock(sk);
3961                                 return 0;
3962                         }
3963                 }
3964                 /*
3965                  *      More generic case of arriving data stream in ESTABLISHED
3966                  */
3967                 tcp_rx_hit1++;
3968                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3969                 {
3970                         kfree_skb(skb, FREE_READ);
3971                         release_sock(sk);
3972                         return 0;
3973                 }
3974                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3975                 {
3976                         kfree_skb(skb, FREE_READ);
3977                         release_sock(sk);
3978                         return 0;
3979                 }
3980                 if(tcp_data(skb, sk, saddr, len))
3981                         kfree_skb(skb, FREE_READ);
3982                 release_sock(sk);
3983                 return 0;
3984         }
3985         tcp_rx_miss++;
3986 #endif  
3987 
3988         /*
3989          *      Now deal with all cases.
3990          */
3991          
3992         switch(sk->state) 
3993         {
3994         
3995                 /*
3996                  * This should close the system down if it's waiting
3997                  * for an ack that is never going to be sent.
3998                  */
3999                 case TCP_LAST_ACK:
4000                         if (th->rst) 
4001                         {
4002                                 sk->zapped=1;
4003                                 sk->err = ECONNRESET;
4004                                 tcp_set_state(sk,TCP_CLOSE);
4005                                 sk->shutdown = SHUTDOWN_MASK;
4006                                 if (!sk->dead) 
4007                                 {
4008                                         sk->state_change(sk);
4009                                 }
4010                                 kfree_skb(skb, FREE_READ);
4011                                 release_sock(sk);
4012                                 return(0);
4013                         }
4014 
4015                 case TCP_ESTABLISHED:
4016                 case TCP_CLOSE_WAIT:
4017                 case TCP_CLOSING:
4018                 case TCP_FIN_WAIT1:
4019                 case TCP_FIN_WAIT2:
4020                 case TCP_TIME_WAIT:
4021                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4022                         {
4023                                 kfree_skb(skb, FREE_READ);
4024                                 release_sock(sk);
4025                                 return(0);
4026                         }
4027 
4028                         if (th->rst) 
4029                         {
4030                                 tcp_statistics.TcpEstabResets++;
4031                                 sk->zapped=1;
4032                                 /* This means the thing should really be closed. */
4033                                 sk->err = ECONNRESET;
4034                                 if (sk->state == TCP_CLOSE_WAIT) 
4035                                 {
4036                                         sk->err = EPIPE;
4037                                 }
4038         
4039                                 /*
4040                                  * A reset with a fin just means that
4041                                  * the data was not all read.
4042                                  */
4043                                 tcp_set_state(sk,TCP_CLOSE);
4044                                 sk->shutdown = SHUTDOWN_MASK;
4045                                 if (!sk->dead) 
4046                                 {
4047                                         sk->state_change(sk);
4048                                 }
4049                                 kfree_skb(skb, FREE_READ);
4050                                 release_sock(sk);
4051                                 return(0);
4052                         }
4053                         if (th->syn) 
4054                         {
4055                                 tcp_statistics.TcpEstabResets++;
4056                                 sk->err = ECONNRESET;
4057                                 tcp_set_state(sk,TCP_CLOSE);
4058                                 sk->shutdown = SHUTDOWN_MASK;
4059                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
4060                                 if (!sk->dead) {
4061                                         sk->state_change(sk);
4062                                 }
4063                                 kfree_skb(skb, FREE_READ);
4064                                 release_sock(sk);
4065                                 return(0);
4066                         }
4067         
4068                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
4069                                 kfree_skb(skb, FREE_READ);
4070                                 release_sock(sk);
4071                                 return(0);
4072                         }
4073         
4074                         if (tcp_urg(sk, th, saddr, len)) {
4075                                 kfree_skb(skb, FREE_READ);
4076                                 release_sock(sk);
4077                                 return(0);
4078                         }
4079 
4080         
4081                         if (tcp_data(skb, sk, saddr, len)) {
4082                                 kfree_skb(skb, FREE_READ);
4083                                 release_sock(sk);
4084                                 return(0);
4085                         }       
4086 
4087                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
4088                                 kfree_skb(skb, FREE_READ);
4089                                 release_sock(sk);
4090                                 return(0);
4091                         }
4092         
4093                         release_sock(sk);
4094                         return(0);
4095                 
4096                 case TCP_CLOSE:
4097                         if (sk->dead || sk->daddr) {
4098                                 kfree_skb(skb, FREE_READ);
4099                                         release_sock(sk);
4100                                 return(0);
4101                         }
4102         
4103                         if (!th->rst) {
4104                                 if (!th->ack)
4105                                         th->ack_seq = 0;
4106                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4107                         }
4108                         kfree_skb(skb, FREE_READ);
4109                         release_sock(sk);
4110                                 return(0);
4111         
4112                 case TCP_LISTEN:
4113                         if (th->rst) {
4114                                 kfree_skb(skb, FREE_READ);
4115                                 release_sock(sk);
4116                                 return(0);
4117                         }
4118                         if (th->ack) {
4119                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4120                                 kfree_skb(skb, FREE_READ);
4121                                 release_sock(sk);
4122                                 return(0);
4123                         }
4124         
4125                         if (th->syn) 
4126                         {
4127                                 /*
4128                                  * Now we just put the whole thing including
4129                                  * the header and saddr, and protocol pointer
4130                                  * into the buffer.  We can't respond until the
4131                                  * user tells us to accept the connection.
4132                                  */
4133                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
4134                                 release_sock(sk);
4135                                 return(0);
4136                         }
4137 
4138                         kfree_skb(skb, FREE_READ);
4139                         release_sock(sk);
4140                         return(0);
4141 
4142                 case TCP_SYN_RECV:
4143                         if (th->syn) {
4144                                 /* Probably a retransmitted syn */
4145                                 kfree_skb(skb, FREE_READ);
4146                                 release_sock(sk);
4147                                 return(0);
4148                         }
4149         
4150         
4151                 default:
4152                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
4153                         {
4154                                 kfree_skb(skb, FREE_READ);
4155                                 release_sock(sk);
4156                                 return(0);
4157                         }
4158         
4159                 case TCP_SYN_SENT:
4160                         if (th->rst) 
4161                         {
4162                                 tcp_statistics.TcpAttemptFails++;
4163                                 sk->err = ECONNREFUSED;
4164                                 tcp_set_state(sk,TCP_CLOSE);
4165                                 sk->shutdown = SHUTDOWN_MASK;
4166                                 sk->zapped = 1;
4167                                 if (!sk->dead) 
4168                                 {
4169                                         sk->state_change(sk);
4170                                 }
4171                                 kfree_skb(skb, FREE_READ);
4172                                 release_sock(sk);
4173                                 return(0);
4174                         }
4175                         if (!th->ack) 
4176                         {
4177                                 if (th->syn) 
4178                                 {
4179                                         tcp_set_state(sk,TCP_SYN_RECV);
4180                                 }
4181                                 kfree_skb(skb, FREE_READ);
4182                                 release_sock(sk);
4183                                 return(0);
4184                         }
4185         
4186                         switch(sk->state) 
4187                         {
4188                                 case TCP_SYN_SENT:
4189                                         if (!tcp_ack(sk, th, saddr, len)) 
4190                                         {
4191                                                 tcp_statistics.TcpAttemptFails++;
4192                                                 tcp_reset(daddr, saddr, th,
4193                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4194                                                 kfree_skb(skb, FREE_READ);
4195                                                         release_sock(sk);
4196                                                 return(0);
4197                                         }
4198         
4199                                         /*
4200                                          * If the syn bit is also set, switch to
4201                                          * tcp_syn_recv, and then to established.
4202                                          */
4203                                         if (!th->syn) 
4204                                         {
4205                                                 kfree_skb(skb, FREE_READ);
4206                                                 release_sock(sk);
4207                                                 return(0);
4208                                         }
4209         
4210                                         /* Ack the syn and fall through. */
4211                                         sk->acked_seq = th->seq+1;
4212                                         sk->fin_seq = th->seq;
4213                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4214                                                 sk, th, sk->daddr);
4215                 
4216                                 case TCP_SYN_RECV:
4217                                         if (!tcp_ack(sk, th, saddr, len)) 
4218                                         {
4219                                                 tcp_statistics.TcpAttemptFails++;
4220                                                 tcp_reset(daddr, saddr, th,
4221                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4222                                                 kfree_skb(skb, FREE_READ);
4223                                                 release_sock(sk);
4224                                                 return(0);
4225                                         }
4226         
4227                                         tcp_set_state(sk,TCP_ESTABLISHED);
4228         
4229                                         /*
4230                                          *      Now we need to finish filling out
4231                                          *      some of the tcp header.
4232                                          * 
4233                                          *      We need to check for mtu info. 
4234                                          */
4235                                         tcp_options(sk, th);
4236                                         sk->dummy_th.dest = th->source;
4237                                         sk->copied_seq = sk->acked_seq-1;
4238                                         if (!sk->dead) 
4239                                         {
4240                                                 sk->state_change(sk);
4241                                         }
4242         
4243                                         /*
4244                                          * We've already processed his first
4245                                          * ack.  In just about all cases that
4246                                          * will have set max_window.  This is
4247                                          * to protect us against the possibility
4248                                          * that the initial window he sent was 0.
4249                                          * This must occur after tcp_options, which
4250                                          * sets sk->mtu.
4251                                          */
4252                                         if (sk->max_window == 0) 
4253                                         {
4254                                                 sk->max_window = 32;
4255                                                 sk->mss = min(sk->max_window, sk->mtu);
4256                                         }
4257 
4258                                         /*
4259                                          * Now process the rest like we were
4260                                          * already in the established state.
4261                                          */
4262                                         if (th->urg) 
4263                                         {
4264                                                 if (tcp_urg(sk, th, saddr, len)) 
4265                                                 { 
4266                                                         kfree_skb(skb, FREE_READ);
4267                                                         release_sock(sk);
4268                                                         return(0);
4269                                                 }
4270                                         }
4271                                         if (tcp_data(skb, sk, saddr, len))
4272                                                 kfree_skb(skb, FREE_READ);
4273 
4274                                         if (th->fin)
4275                                                 tcp_fin(skb, sk, th, saddr, dev);
4276                                         release_sock(sk);
4277                                         return(0);
4278                         }
4279         
4280                         if (th->urg) 
4281                         {
4282                                 if (tcp_urg(sk, th, saddr, len)) 
4283                                 {
4284                                         kfree_skb(skb, FREE_READ);
4285                                         release_sock(sk);
4286                                         return(0);
4287                                 }
4288                         }
4289                         if (tcp_data(skb, sk, saddr, len)) 
4290                         {
4291                                 kfree_skb(skb, FREE_READ);
4292                                 release_sock(sk);
4293                                 return(0);
4294                         }
4295         
4296                         if (!th->fin) 
4297                         {
4298                                 release_sock(sk);
4299                                 return(0);
4300                         }
4301                         tcp_fin(skb, sk, th, saddr, dev);
4302                         release_sock(sk);
4303                         return(0);
4304         }
4305 }
4306 
4307 
4308 /*
4309  * This routine sends a packet with an out of date sequence
4310  * number. It assumes the other end will try to ack it.
4311  */
4312 
4313 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4314 {
4315         struct sk_buff *buff;
4316         struct tcphdr *t1;
4317         struct device *dev=NULL;
4318         int tmp;
4319 
4320         if (sk->zapped)
4321                 return; /* After a valid reset we can send no more */
4322 
4323         /*
4324          * Write data can still be transmitted/retransmitted in the
4325          * following states.  If any other state is encountered, return.
4326          */
4327 
4328         if (sk->state != TCP_ESTABLISHED && 
4329             sk->state != TCP_CLOSE_WAIT &&
4330             sk->state != TCP_FIN_WAIT1 && 
4331             sk->state != TCP_LAST_ACK &&
4332             sk->state != TCP_CLOSING
4333         ) {
4334                 return;
4335         }
4336 
4337         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4338         if (buff == NULL) 
4339                 return;
4340 
4341         buff->len = sizeof(struct tcphdr);
4342         buff->free = 1;
4343         buff->sk = sk;
4344         buff->localroute = sk->localroute;
4345 
4346         t1 = (struct tcphdr *) buff->data;
4347 
4348         /* Put in the IP header and routing stuff. */
4349         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4350                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4351         if (tmp < 0) 
4352         {
4353                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4354                 return;
4355         }
4356 
4357         buff->len += tmp;
4358         t1 = (struct tcphdr *)((char *)t1 +tmp);
4359 
4360         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4361 
4362         /*
4363          * Use a previous sequence.
4364          * This should cause the other end to send an ack.
4365          */
4366         t1->seq = htonl(sk->sent_seq-1);
4367         t1->ack = 1; 
4368         t1->res1= 0;
4369         t1->res2= 0;
4370         t1->rst = 0;
4371         t1->urg = 0;
4372         t1->psh = 0;
4373         t1->fin = 0;
4374         t1->syn = 0;
4375         t1->ack_seq = ntohl(sk->acked_seq);
4376         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4377         t1->doff = sizeof(*t1)/4;
4378         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4379 
4380          /*     Send it and free it.
4381           *     This will prevent the timer from automatically being restarted.
4382           */
4383         sk->prot->queue_xmit(sk, dev, buff, 1);
4384         tcp_statistics.TcpOutSegs++;
4385 }
4386 
4387 void
4388 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4389 {
4390         if (sk->zapped)
4391                 return;         /* After a valid reset we can send no more */
4392 
4393         tcp_write_wakeup(sk);
4394 
4395         sk->backoff++;
4396         sk->rto = min(sk->rto << 1, 120*HZ);
4397         reset_timer (sk, TIME_PROBE0, sk->rto);
4398         sk->retransmits++;
4399         sk->prot->retransmits ++;
4400 }
4401 
4402 /*
4403  *      Socket option code for TCP. 
4404  */
4405   
4406 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4407 {
4408         int val,err;
4409 
4410         if(level!=SOL_TCP)
4411                 return ip_setsockopt(sk,level,optname,optval,optlen);
4412 
4413         if (optval == NULL) 
4414                 return(-EINVAL);
4415 
4416         err=verify_area(VERIFY_READ, optval, sizeof(int));
4417         if(err)
4418                 return err;
4419         
4420         val = get_fs_long((unsigned long *)optval);
4421 
4422         switch(optname)
4423         {
4424                 case TCP_MAXSEG:
4425 /*                      if(val<200||val>2048 || val>sk->mtu) */
4426 /*
4427  * values greater than interface MTU won't take effect.  however at
4428  * the point when this call is done we typically don't yet know
4429  * which interface is going to be used
4430  */
4431                         if(val<1||val>MAX_WINDOW)
4432                                 return -EINVAL;
4433                         sk->user_mss=val;
4434                         return 0;
4435                 case TCP_NODELAY:
4436                         sk->nonagle=(val==0)?0:1;
4437                         return 0;
4438                 default:
4439                         return(-ENOPROTOOPT);
4440         }
4441 }
4442 
4443 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4444 {
4445         int val,err;
4446 
4447         if(level!=SOL_TCP)
4448                 return ip_getsockopt(sk,level,optname,optval,optlen);
4449                         
4450         switch(optname)
4451         {
4452                 case TCP_MAXSEG:
4453                         val=sk->user_mss;
4454                         break;
4455                 case TCP_NODELAY:
4456                         val=sk->nonagle;        /* Until Johannes stuff is in */
4457                         break;
4458                 default:
4459                         return(-ENOPROTOOPT);
4460         }
4461         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4462         if(err)
4463                 return err;
4464         put_fs_long(sizeof(int),(unsigned long *) optlen);
4465 
4466         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4467         if(err)
4468                 return err;
4469         put_fs_long(val,(unsigned long *)optval);
4470 
4471         return(0);
4472 }       
4473 
4474 
4475 struct proto tcp_prot = {
4476         sock_wmalloc,
4477         sock_rmalloc,
4478         sock_wfree,
4479         sock_rfree,
4480         sock_rspace,
4481         sock_wspace,
4482         tcp_close,
4483         tcp_read,
4484         tcp_write,
4485         tcp_sendto,
4486         tcp_recvfrom,
4487         ip_build_header,
4488         tcp_connect,
4489         tcp_accept,
4490         ip_queue_xmit,
4491         tcp_retransmit,
4492         tcp_write_wakeup,
4493         tcp_read_wakeup,
4494         tcp_rcv,
4495         tcp_select,
4496         tcp_ioctl,
4497         NULL,
4498         tcp_shutdown,
4499         tcp_setsockopt,
4500         tcp_getsockopt,
4501         128,
4502         0,
4503         {NULL,},
4504         "TCP"
4505 };

/* [previous][next][first][last][top][bottom][index][help] */