root/net/inet/tcp.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. min
  2. tcp_select_window
  3. tcp_time_wait
  4. tcp_retransmit
  5. tcp_err
  6. tcp_readable
  7. tcp_select
  8. tcp_ioctl
  9. tcp_check
  10. tcp_send_check
  11. tcp_send_skb
  12. tcp_dequeue_partial
  13. tcp_send_partial
  14. tcp_enqueue_partial
  15. tcp_send_ack
  16. tcp_build_header
  17. tcp_write
  18. tcp_sendto
  19. tcp_read_wakeup
  20. cleanup_rbuf
  21. tcp_read_urg
  22. tcp_read
  23. tcp_shutdown
  24. tcp_recvfrom
  25. tcp_reset
  26. tcp_options
  27. default_mask
  28. tcp_conn_request
  29. tcp_close
  30. tcp_write_xmit
  31. sort_send
  32. tcp_ack
  33. tcp_data
  34. tcp_check_urg
  35. tcp_urg
  36. tcp_fin
  37. tcp_accept
  38. tcp_connect
  39. tcp_sequence
  40. tcp_clean_end
  41. tcp_rcv
  42. tcp_write_wakeup
  43. tcp_send_probe0
  44. tcp_setsockopt
  45. tcp_getsockopt

   1 /*
   2  * INET         An implementation of the TCP/IP protocol suite for the LINUX
   3  *              operating system.  INET is implemented using the  BSD Socket
   4  *              interface as the means of communication with the user level.
   5  *
   6  *              Implementation of the Transmission Control Protocol(TCP).
   7  *
   8  * Version:     @(#)tcp.c       1.0.16  05/25/93
   9  *
  10  * Authors:     Ross Biro, <bir7@leland.Stanford.Edu>
  11  *              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  12  *              Mark Evans, <evansmp@uhura.aston.ac.uk>
  13  *              Corey Minyard <wf-rch!minyard@relay.EU.net>
  14  *              Florian La Roche, <flla@stud.uni-sb.de>
  15  *              Charles Hedrick, <hedrick@klinzhai.rutgers.edu>
  16  *              Linus Torvalds, <torvalds@cs.helsinki.fi>
  17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
  18  *              Matthew Dillon, <dillon@apollo.west.oic.com>
  19  *
  20  * Fixes:       
  21  *              Alan Cox        :       Numerous verify_area() calls
  22  *              Alan Cox        :       Set the ACK bit on a reset
  23  *              Alan Cox        :       Stopped it crashing if it closed while sk->inuse=1
  24  *                                      and was trying to connect (tcp_err()).
  25  *              Alan Cox        :       All icmp error handling was broken
  26  *                                      pointers passed where wrong and the
  27  *                                      socket was looked up backwards. Nobody
  28  *                                      tested any icmp error code obviously.
  29  *              Alan Cox        :       tcp_err() now handled properly. It wakes people
  30  *                                      on errors. select behaves and the icmp error race
  31  *                                      has gone by moving it into sock.c
  32  *              Alan Cox        :       tcp_reset() fixed to work for everything not just
  33  *                                      packets for unknown sockets.
  34  *              Alan Cox        :       tcp option processing.
  35  *              Alan Cox        :       Reset tweaked (still not 100%) [Had syn rule wrong]
  36  *              Herp Rosmanith  :       More reset fixes
  37  *              Alan Cox        :       No longer acks invalid rst frames. Acking
  38  *                                      any kind of RST is right out.
  39  *              Alan Cox        :       Sets an ignore me flag on an rst receive
  40  *                                      otherwise odd bits of prattle escape still
  41  *              Alan Cox        :       Fixed another acking RST frame bug. Should stop
  42  *                                      LAN workplace lockups.
  43  *              Alan Cox        :       Some tidyups using the new skb list facilities
  44  *              Alan Cox        :       sk->keepopen now seems to work
  45  *              Alan Cox        :       Pulls options out correctly on accepts
  46  *              Alan Cox        :       Fixed assorted sk->rqueue->next errors
  47  *              Alan Cox        :       PSH doesn't end a TCP read. Switched a bit to skb ops.
  48  *              Alan Cox        :       Tidied tcp_data to avoid a potential nasty.
  49  *              Alan Cox        :       Added some beter commenting, as the tcp is hard to follow
  50  *              Alan Cox        :       Removed incorrect check for 20 * psh
  51  *      Michael O'Reilly        :       ack < copied bug fix.
  52  *      Johannes Stille         :       Misc tcp fixes (not all in yet).
  53  *              Alan Cox        :       FIN with no memory -> CRASH
  54  *              Alan Cox        :       Added socket option proto entries. Also added awareness of them to accept.
  55  *              Alan Cox        :       Added TCP options (SOL_TCP)
  56  *              Alan Cox        :       Switched wakeup calls to callbacks, so the kernel can layer network sockets.
  57  *              Alan Cox        :       Use ip_tos/ip_ttl settings.
  58  *              Alan Cox        :       Handle FIN (more) properly (we hope).
  59  *              Alan Cox        :       RST frames sent on unsynchronised state ack error/
  60  *              Alan Cox        :       Put in missing check for SYN bit.
  61  *              Alan Cox        :       Added tcp_select_window() aka NET2E 
  62  *                                      window non shrink trick.
  63  *              Alan Cox        :       Added a couple of small NET2E timer fixes
  64  *              Charles Hedrick :       TCP fixes
  65  *              Toomas Tamm     :       TCP window fixes
  66  *              Alan Cox        :       Small URG fix to rlogin ^C ack fight
  67  *              Charles Hedrick :       Rewrote most of it to actually work
  68  *              Linus           :       Rewrote tcp_read() and URG handling
  69  *                                      completely
  70  *              Gerhard Koerting:       Fixed some missing timer handling
  71  *              Matthew Dillon  :       Reworked TCP machine states as per RFC
  72  *              Gerhard Koerting:       PC/TCP workarounds
  73  *              Adam Caldwell   :       Assorted timer/timing errors
  74  *              Matthew Dillon  :       Fixed another RST bug
  75  *              Alan Cox        :       Move to kernel side addressing changes.
  76  *              Alan Cox        :       Beginning work on TCP fastpathing (not yet usable)
  77  *
  78  *
  79  * To Fix:
  80  *                      Possibly a problem with accept(). BSD accept never fails after
  81  *              it causes a select. Linux can - given the official select semantics I
  82  *              feel that _really_ its the BSD network programs that are bust (notably
  83  *              inetd, which hangs occasionally because of this).
  84  *
  85  *                      Fast path the code. Two things here - fix the window calculation
  86  *              so it doesn't iterate over the queue, also spot packets with no funny
  87  *              options arriving in order and process directly.
  88  *                      Any assembler hacker who can speed up the checksum routines will
  89  *              be welcome as well as someone who feels like writing a single 'checksum udp
  90  *              and copy up to user mode for the first n bytes at the same time' routine.
  91  *              which should be quicker than the current sum then copy for the UDP layer.
  92  *
  93  *              This program is free software; you can redistribute it and/or
  94  *              modify it under the terms of the GNU General Public License
  95  *              as published by the Free Software Foundation; either version
  96  *              2 of the License, or(at your option) any later version.
  97  *
  98  * Description of States:
  99  *
 100  *      TCP_SYN_SENT            sent a connection request, waiting for ack
 101  *
 102  *      TCP_SYN_RECV            received a connection request, sent ack,
 103  *                              waiting for final ack in three-way handshake.
 104  *
 105  *      TCP_ESTABLISHED         connection established
 106  *
 107  *      TCP_FIN_WAIT1           our side has shutdown, waiting to complete
 108  *                              transmission of remaining buffered data
 109  *
 110  *      TCP_FIN_WAIT2           all buffered data sent, waiting for remote
 111  *                              to shutdown
 112  *
 113  *      TCP_CLOSING             both sides have shutdown but we still have
 114  *                              data we have to finish sending
 115  *
 116  *      TCP_TIME_WAIT           timeout to catch resent junk before entering
 117  *                              closed, can only be entered from FIN_WAIT2
 118  *                              or CLOSING.  Required because the other end
 119  *                              may not have gotten our last ACK causing it
 120  *                              to retransmit the data packet (which we ignore)
 121  *
 122  *      TCP_CLOSE_WAIT          remote side has shutdown and is waiting for
 123  *                              us to finish writing our data and to shutdown
 124  *                              (we have to close() to move on to LAST_ACK)
 125  *
 126  *      TCP_LAST_ACK            out side has shutdown after remote has
 127  *                              shutdown.  There may still be data in our
 128  *                              buffer that we have to finish sending
 129  *              
 130  *      TCP_CLOSED              socket is finished
 131  */
 132 #include <linux/types.h>
 133 #include <linux/sched.h>
 134 #include <linux/mm.h>
 135 #include <linux/string.h>
 136 #include <linux/socket.h>
 137 #include <linux/sockios.h>
 138 #include <linux/termios.h>
 139 #include <linux/in.h>
 140 #include <linux/fcntl.h>
 141 #include <linux/inet.h>
 142 #include <linux/netdevice.h>
 143 #include "snmp.h"
 144 #include "ip.h"
 145 #include "protocol.h"
 146 #include "icmp.h"
 147 #include "tcp.h"
 148 #include <linux/skbuff.h>
 149 #include "sock.h"
 150 #include "route.h"
 151 #include <linux/errno.h>
 152 #include <linux/timer.h>
 153 #include <asm/system.h>
 154 #include <asm/segment.h>
 155 #include <linux/mm.h>
 156 
 157 #undef TCP_FASTPATH
 158 
 159 #define SEQ_TICK 3
 160 unsigned long seq_offset;
 161 struct tcp_mib  tcp_statistics;
 162 
 163 #ifdef TCP_FASTPATH
 164 unsigned long tcp_rx_miss=0, tcp_rx_hit1=0, tcp_rx_hit2=0;
 165 #endif
 166 
 167 
 168 static __inline__ int min(unsigned int a, unsigned int b)
     /* [previous][next][first][last][top][bottom][index][help] */
 169 {
 170         if (a < b) 
 171                 return(a);
 172         return(b);
 173 }
 174 
 175 
 176 /* This routine picks a TCP windows for a socket based on
 177    the following constraints
 178    
 179    1. The window can never be shrunk once it is offered (RFC 793)
 180    2. We limit memory per socket
 181    
 182    For now we use NET2E3's heuristic of offering half the memory
 183    we have handy. All is not as bad as this seems however because
 184    of two things. Firstly we will bin packets even within the window
 185    in order to get the data we are waiting for into the memory limit.
 186    Secondly we bin common duplicate forms at receive time
 187 
 188    Better heuristics welcome
 189 */
 190    
 191 int tcp_select_window(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 192 {
 193         int new_window = sk->prot->rspace(sk);
 194         
 195 /*
 196  * two things are going on here.  First, we don't ever offer a
 197  * window less than min(sk->mss, MAX_WINDOW/2).  This is the
 198  * receiver side of SWS as specified in RFC1122.
 199  * Second, we always give them at least the window they
 200  * had before, in order to avoid retracting window.  This
 201  * is technically allowed, but RFC1122 advises against it and
 202  * in practice it causes trouble.
 203  */
 204         if (new_window < min(sk->mss, MAX_WINDOW/2) || new_window < sk->window)
 205                 return(sk->window);
 206         return(new_window);
 207 }
 208 
 209 /*
 210  *      Enter the time wait state. 
 211  */
 212 
 213 static void tcp_time_wait(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 214 {
 215         sk->state = TCP_TIME_WAIT;
 216         sk->shutdown = SHUTDOWN_MASK;
 217         if (!sk->dead)
 218                 sk->state_change(sk);
 219         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
 220 }
 221 
 222 /*
 223  *      A timer event has trigger a tcp retransmit timeout. The
 224  *      socket xmit queue is ready and set up to send. Because
 225  *      the ack receive code keeps the queue straight we do
 226  *      nothing clever here.
 227  */
 228 
 229 static void tcp_retransmit(struct sock *sk, int all)
     /* [previous][next][first][last][top][bottom][index][help] */
 230 {
 231         if (all) 
 232         {
 233                 ip_retransmit(sk, all);
 234                 return;
 235         }
 236 
 237         sk->ssthresh = sk->cong_window >> 1; /* remember window where we lost */
 238         /* sk->ssthresh in theory can be zero.  I guess that's OK */
 239         sk->cong_count = 0;
 240 
 241         sk->cong_window = 1;
 242 
 243         /* Do the actual retransmit. */
 244         ip_retransmit(sk, all);
 245 }
 246 
 247 
 248 /*
 249  * This routine is called by the ICMP module when it gets some
 250  * sort of error condition.  If err < 0 then the socket should
 251  * be closed and the error returned to the user.  If err > 0
 252  * it's just the icmp type << 8 | icmp code.  After adjustment
 253  * header points to the first 8 bytes of the tcp header.  We need
 254  * to find the appropriate port.
 255  */
 256 
 257 void tcp_err(int err, unsigned char *header, unsigned long daddr,
     /* [previous][next][first][last][top][bottom][index][help] */
 258         unsigned long saddr, struct inet_protocol *protocol)
 259 {
 260         struct tcphdr *th;
 261         struct sock *sk;
 262         struct iphdr *iph=(struct iphdr *)header;
 263   
 264         header+=4*iph->ihl;
 265    
 266 
 267         th =(struct tcphdr *)header;
 268         sk = get_sock(&tcp_prot, th->source/*dest*/, daddr, th->dest/*source*/, saddr);
 269 
 270         if (sk == NULL) 
 271                 return;
 272   
 273         if(err<0)
 274         {
 275                 sk->err = -err;
 276                 sk->error_report(sk);
 277                 return;
 278         }
 279 
 280         if ((err & 0xff00) == (ICMP_SOURCE_QUENCH << 8)) 
 281         {
 282                 /*
 283                  * FIXME:
 284                  * For now we will just trigger a linear backoff.
 285                  * The slow start code should cause a real backoff here.
 286                  */
 287                 if (sk->cong_window > 4)
 288                         sk->cong_window--;
 289                 return;
 290         }
 291 
 292 /*      sk->err = icmp_err_convert[err & 0xff].errno;  -- moved as TCP should hide non fatals internally (and does) */
 293 
 294         /*
 295          * If we've already connected we will keep trying
 296          * until we time out, or the user gives up.
 297          */
 298 
 299         if (icmp_err_convert[err & 0xff].fatal) 
 300         {
 301                 if (sk->state == TCP_SYN_SENT) 
 302                 {
 303                         tcp_statistics.TcpAttemptFails++;
 304                         sk->state = TCP_CLOSE;
 305                         sk->error_report(sk);           /* Wake people up to see the error (see connect in sock.c) */
 306                 }
 307                 sk->err = icmp_err_convert[err & 0xff].errno;           
 308         }
 309         return;
 310 }
 311 
 312 
 313 /*
 314  *      Walk down the receive queue counting readable data until we hit the end or we find a gap
 315  *      in the received data queue (ie a frame missing that needs sending to us)
 316  */
 317 
 318 static int tcp_readable(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 319 {
 320         unsigned long counted;
 321         unsigned long amount;
 322         struct sk_buff *skb;
 323         int sum;
 324         unsigned long flags;
 325 
 326         if(sk && sk->debug)
 327                 printk("tcp_readable: %p - ",sk);
 328 
 329         save_flags(flags);
 330         cli();
 331         if (sk == NULL || (skb = skb_peek(&sk->receive_queue)) == NULL)
 332         {
 333                 restore_flags(flags);
 334                 if(sk && sk->debug) 
 335                         printk("empty\n");
 336                 return(0);
 337         }
 338   
 339         counted = sk->copied_seq+1;     /* Where we are at the moment */
 340         amount = 0;
 341   
 342         /* Do until a push or until we are out of data. */
 343         do 
 344         {
 345                 if (before(counted, skb->h.th->seq))    /* Found a hole so stops here */
 346                         break;
 347                 sum = skb->len -(counted - skb->h.th->seq);     /* Length - header but start from where we are up to (avoid overlaps) */
 348                 if (skb->h.th->syn)
 349                         sum++;
 350                 if (sum >= 0) 
 351                 {                                       /* Add it up, move on */
 352                         amount += sum;
 353                         if (skb->h.th->syn) 
 354                                 amount--;
 355                         counted += sum;
 356                 }
 357                 if (amount && skb->h.th->psh) break;
 358                 skb = skb->next;
 359         }
 360         while(skb != (struct sk_buff *)&sk->receive_queue);
 361 
 362         if (amount && !sk->urginline && sk->urg_data &&
 363             (sk->urg_seq - sk->copied_seq) <= (counted - sk->copied_seq))
 364                 amount--;               /* don't count urg data */
 365         restore_flags(flags);
 366         if(sk->debug)
 367                 printk("got %lu bytes.\n",amount);
 368         return(amount);
 369 }
 370 
 371 
 372 /*
 373  *      Wait for a TCP event. Note the oddity with SEL_IN and reading. The
 374  *      listening socket has a receive queue of sockets to accept.
 375  */
 376 
 377 static int tcp_select(struct sock *sk, int sel_type, select_table *wait)
     /* [previous][next][first][last][top][bottom][index][help] */
 378 {
 379         sk->inuse = 1;
 380 
 381         switch(sel_type) 
 382         {
 383                 case SEL_IN:
 384                         if(sk->debug)
 385                                 printk("select in");
 386                         select_wait(sk->sleep, wait);
 387                         if(sk->debug)
 388                                 printk("-select out");
 389                         if (skb_peek(&sk->receive_queue) != NULL) 
 390                         {
 391                                 if (sk->state == TCP_LISTEN || tcp_readable(sk)) 
 392                                 {
 393                                         release_sock(sk);
 394                                         if(sk->debug)
 395                                                 printk("-select ok data\n");
 396                                         return(1);
 397                                 }
 398                         }
 399                         if (sk->err != 0)       /* Receiver error */
 400                         {
 401                                 release_sock(sk);
 402                                 if(sk->debug)
 403                                         printk("-select ok error");
 404                                 return(1);
 405                         }
 406                         if (sk->shutdown & RCV_SHUTDOWN) 
 407                         {
 408                                 release_sock(sk);
 409                                 if(sk->debug)
 410                                         printk("-select ok down\n");
 411                                 return(1);
 412                         } 
 413                         else 
 414                         {
 415                                 release_sock(sk);
 416                                 if(sk->debug)
 417                                         printk("-select fail\n");
 418                                 return(0);
 419                         }
 420                 case SEL_OUT:
 421                         select_wait(sk->sleep, wait);
 422                         if (sk->shutdown & SEND_SHUTDOWN) 
 423                         {
 424                                 /* FIXME: should this return an error? */
 425                                 release_sock(sk);
 426                                 return(0);
 427                         }
 428 
 429                         /*
 430                          * FIXME:
 431                          * Hack so it will probably be able to write
 432                          * something if it says it's ok to write.
 433                          */
 434                         
 435                         if (sk->prot->wspace(sk) >= sk->mss) 
 436                         {
 437                                 release_sock(sk);
 438                                 /* This should cause connect to work ok. */
 439                                 if (sk->state == TCP_SYN_RECV ||
 440                                     sk->state == TCP_SYN_SENT) return(0);
 441                                 return(1);
 442                         }
 443                         release_sock(sk);
 444                         return(0);
 445                 case SEL_EX:
 446                         select_wait(sk->sleep,wait);
 447                         if (sk->err || sk->urg_data) 
 448                         {
 449                                 release_sock(sk);
 450                                 return(1);
 451                         }
 452                         release_sock(sk);
 453                         return(0);
 454         }
 455 
 456         release_sock(sk);
 457         return(0);
 458 }
 459 
 460 
 461 int tcp_ioctl(struct sock *sk, int cmd, unsigned long arg)
     /* [previous][next][first][last][top][bottom][index][help] */
 462 {
 463         int err;
 464         switch(cmd) 
 465         {
 466 
 467                 case TIOCINQ:
 468 #ifdef FIXME    /* FIXME: */
 469                 case FIONREAD:
 470 #endif
 471                 {
 472                         unsigned long amount;
 473 
 474                         if (sk->state == TCP_LISTEN) 
 475                                 return(-EINVAL);
 476 
 477                         sk->inuse = 1;
 478                         amount = tcp_readable(sk);
 479                         release_sock(sk);
 480                         err=verify_area(VERIFY_WRITE,(void *)arg,
 481                                                    sizeof(unsigned long));
 482                         if(err)
 483                                 return err;
 484                         put_fs_long(amount,(unsigned long *)arg);
 485                         return(0);
 486                 }
 487                 case SIOCATMARK:
 488                 {
 489                         int answ = sk->urg_data && sk->urg_seq == sk->copied_seq+1;
 490 
 491                         err = verify_area(VERIFY_WRITE,(void *) arg,
 492                                                   sizeof(unsigned long));
 493                         if (err)
 494                                 return err;
 495                         put_fs_long(answ,(int *) arg);
 496                         return(0);
 497                 }
 498                 case TIOCOUTQ:
 499                 {
 500                         unsigned long amount;
 501 
 502                         if (sk->state == TCP_LISTEN) return(-EINVAL);
 503                         amount = sk->prot->wspace(sk);
 504                         err=verify_area(VERIFY_WRITE,(void *)arg,
 505                                                    sizeof(unsigned long));
 506                         if(err)
 507                                 return err;
 508                         put_fs_long(amount,(unsigned long *)arg);
 509                         return(0);
 510                 }
 511                 default:
 512                         return(-EINVAL);
 513         }
 514 }
 515 
 516 
 517 /*
 518  *      This routine computes a TCP checksum. 
 519  */
 520  
 521 unsigned short tcp_check(struct tcphdr *th, int len,
     /* [previous][next][first][last][top][bottom][index][help] */
 522           unsigned long saddr, unsigned long daddr)
 523 {     
 524         unsigned long sum;
 525    
 526         if (saddr == 0) saddr = ip_my_addr();
 527         
 528         __asm__("\t addl %%ecx,%%ebx\n"
 529                 "\t adcl %%edx,%%ebx\n"
 530                 "\t adcl $0, %%ebx\n"
 531                 : "=b"(sum)
 532                 : "0"(daddr), "c"(saddr), "d"((ntohs(len) << 16) + IPPROTO_TCP*256)
 533                 : "cx","bx","dx" );
 534    
 535         if (len > 3) 
 536         {
 537                 __asm__("\tclc\n"
 538                         "1:\n"
 539                         "\t lodsl\n"
 540                         "\t adcl %%eax, %%ebx\n"
 541                         "\t loop 1b\n"
 542                         "\t adcl $0, %%ebx\n"
 543                         : "=b"(sum) , "=S"(th)
 544                         : "0"(sum), "c"(len/4) ,"1"(th)
 545                         : "ax", "cx", "bx", "si" );
 546         }
 547    
 548         /* Convert from 32 bits to 16 bits. */
 549         __asm__("\t movl %%ebx, %%ecx\n"
 550                 "\t shrl $16,%%ecx\n"
 551                 "\t addw %%cx, %%bx\n"
 552                 "\t adcw $0, %%bx\n"
 553                 : "=b"(sum)
 554                 : "0"(sum)
 555                 : "bx", "cx");
 556    
 557         /* Check for an extra word. */
 558 
 559         if ((len & 2) != 0) 
 560         {
 561                 __asm__("\t lodsw\n"
 562                         "\t addw %%ax,%%bx\n"
 563                         "\t adcw $0, %%bx\n"
 564                         : "=b"(sum), "=S"(th)
 565                         : "0"(sum) ,"1"(th)
 566                         : "si", "ax", "bx");
 567         }
 568    
 569         /* Now check for the extra byte. */
 570         if ((len & 1) != 0) 
 571         {
 572                 __asm__("\t lodsb\n"
 573                         "\t movb $0,%%ah\n"
 574                         "\t addw %%ax,%%bx\n"
 575                         "\t adcw $0, %%bx\n"
 576                         : "=b"(sum)
 577                         : "0"(sum) ,"S"(th)
 578                         : "si", "ax", "bx");
 579         }
 580    
 581         /* We only want the bottom 16 bits, but we never cleared the top 16. */
 582   
 583         return((~sum) & 0xffff);
 584 }
 585 
 586 
 587 void tcp_send_check(struct tcphdr *th, unsigned long saddr, 
     /* [previous][next][first][last][top][bottom][index][help] */
 588                 unsigned long daddr, int len, struct sock *sk)
 589 {
 590         th->check = 0;
 591         th->check = tcp_check(th, len, saddr, daddr);
 592         return;
 593 }
 594 
 595 static void tcp_send_skb(struct sock *sk, struct sk_buff *skb)
     /* [previous][next][first][last][top][bottom][index][help] */
 596 {
 597         int size;
 598         struct tcphdr * th = skb->h.th;
 599 
 600         /* length of packet (not counting length of pre-tcp headers) */
 601         size = skb->len - ((unsigned char *) th - skb->data);
 602 
 603         /* sanity check it.. */
 604         if (size < sizeof(struct tcphdr) || size > skb->len) 
 605         {
 606                 printk("tcp_send_skb: bad skb (skb = %p, data = %p, th = %p, len = %lu)\n",
 607                         skb, skb->data, th, skb->len);
 608                 kfree_skb(skb, FREE_WRITE);
 609                 return;
 610         }
 611 
 612         /* If we have queued a header size packet.. */
 613         if (size == sizeof(struct tcphdr)) 
 614         {
 615                 /* If its got a syn or fin its notionally included in the size..*/
 616                 if(!th->syn && !th->fin) 
 617                 {
 618                         printk("tcp_send_skb: attempt to queue a bogon.\n");
 619                         kfree_skb(skb,FREE_WRITE);
 620                         return;
 621                 }
 622         }
 623 
 624         tcp_statistics.TcpOutSegs++;  
 625 
 626         skb->h.seq = ntohl(th->seq) + size - 4*th->doff;
 627         if (after(skb->h.seq, sk->window_seq) ||
 628             (sk->retransmits && sk->timeout == TIME_WRITE) ||
 629              sk->packets_out >= sk->cong_window) 
 630         {
 631                 /* checksum will be supplied by tcp_write_xmit.  So
 632                  * we shouldn't need to set it at all.  I'm being paraoid */
 633                 th->check = 0;
 634                 if (skb->next != NULL) 
 635                 {
 636                         printk("tcp_send_partial: next != NULL\n");
 637                         skb_unlink(skb);
 638                 }
 639                 skb_queue_tail(&sk->write_queue, skb);
 640                 if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
 641                     sk->send_head == NULL &&
 642                     sk->ack_backlog == 0)
 643                         reset_timer(sk, TIME_PROBE0, sk->rto);
 644         } 
 645         else 
 646         {
 647                 th->ack_seq = ntohl(sk->acked_seq);
 648                 th->window = ntohs(tcp_select_window(sk));
 649 
 650                 tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
 651 
 652                 sk->sent_seq = sk->write_seq;
 653                 sk->prot->queue_xmit(sk, skb->dev, skb, 0);
 654         }
 655 }
 656 
 657 struct sk_buff * tcp_dequeue_partial(struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 658 {
 659         struct sk_buff * skb;
 660         unsigned long flags;
 661 
 662         save_flags(flags);
 663         cli();
 664         skb = sk->partial;
 665         if (skb) 
 666         {
 667                 sk->partial = NULL;
 668                 del_timer(&sk->partial_timer);
 669         }
 670         restore_flags(flags);
 671         return skb;
 672 }
 673 
 674 static void tcp_send_partial(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 675 {
 676         struct sk_buff *skb;
 677 
 678         if (sk == NULL)
 679                 return;
 680         while ((skb = tcp_dequeue_partial(sk)) != NULL)
 681                 tcp_send_skb(sk, skb);
 682 }
 683 
 684 void tcp_enqueue_partial(struct sk_buff * skb, struct sock * sk)
     /* [previous][next][first][last][top][bottom][index][help] */
 685 {
 686         struct sk_buff * tmp;
 687         unsigned long flags;
 688 
 689         save_flags(flags);
 690         cli();
 691         tmp = sk->partial;
 692         if (tmp)
 693                 del_timer(&sk->partial_timer);
 694         sk->partial = skb;
 695         sk->partial_timer.expires = HZ;
 696         sk->partial_timer.function = (void (*)(unsigned long)) tcp_send_partial;
 697         sk->partial_timer.data = (unsigned long) sk;
 698         add_timer(&sk->partial_timer);
 699         restore_flags(flags);
 700         if (tmp)
 701                 tcp_send_skb(sk, tmp);
 702 }
 703 
 704 
 705 /*
 706  *      This routine sends an ack and also updates the window. 
 707  */
 708  
 709 static void tcp_send_ack(unsigned long sequence, unsigned long ack,
     /* [previous][next][first][last][top][bottom][index][help] */
 710              struct sock *sk,
 711              struct tcphdr *th, unsigned long daddr)
 712 {
 713         struct sk_buff *buff;
 714         struct tcphdr *t1;
 715         struct device *dev = NULL;
 716         int tmp;
 717 
 718         if(sk->zapped)
 719                 return;         /* We have been reset, we may not send again */
 720         /*
 721          * We need to grab some memory, and put together an ack,
 722          * and then put it into the queue to be sent.
 723          */
 724 
 725         buff = sk->prot->wmalloc(sk, MAX_ACK_SIZE, 1, GFP_ATOMIC);
 726         if (buff == NULL) 
 727         {
 728                 /* Force it to send an ack. */
 729                 sk->ack_backlog++;
 730                 if (sk->timeout != TIME_WRITE && tcp_connected(sk->state)) 
 731                 {
 732                         reset_timer(sk, TIME_WRITE, 10);
 733                 }
 734                 return;
 735         }
 736 
 737         buff->len = sizeof(struct tcphdr);
 738         buff->sk = sk;
 739         buff->localroute = sk->localroute;
 740         t1 =(struct tcphdr *) buff->data;
 741 
 742         /* Put in the IP header and routing stuff. */
 743         tmp = sk->prot->build_header(buff, sk->saddr, daddr, &dev,
 744                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
 745         if (tmp < 0) 
 746         {
 747                 buff->free=1;
 748                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
 749                 return;
 750         }
 751         buff->len += tmp;
 752         t1 =(struct tcphdr *)((char *)t1 +tmp);
 753 
 754         /* FIXME: */
 755         memcpy(t1, th, sizeof(*t1)); /* this should probably be removed */
 756 
 757         /*
 758          *      Swap the send and the receive. 
 759          */
 760          
 761         t1->dest = th->source;
 762         t1->source = th->dest;
 763         t1->seq = ntohl(sequence);
 764         t1->ack = 1;
 765         sk->window = tcp_select_window(sk);
 766         t1->window = ntohs(sk->window);
 767         t1->res1 = 0;
 768         t1->res2 = 0;
 769         t1->rst = 0;
 770         t1->urg = 0;
 771         t1->syn = 0;
 772         t1->psh = 0;
 773         t1->fin = 0;
 774         if (ack == sk->acked_seq) 
 775         {
 776                 sk->ack_backlog = 0;
 777                 sk->bytes_rcv = 0;
 778                 sk->ack_timed = 0;
 779                 if (sk->send_head == NULL && skb_peek(&sk->write_queue) == NULL
 780                                   && sk->timeout == TIME_WRITE) 
 781                 {
 782                         if(sk->keepopen)
 783                                 reset_timer(sk,TIME_KEEPOPEN,TCP_TIMEOUT_LEN);
 784                         else
 785                                 delete_timer(sk);
 786                 }
 787         }
 788         t1->ack_seq = ntohl(ack);
 789         t1->doff = sizeof(*t1)/4;
 790         tcp_send_check(t1, sk->saddr, daddr, sizeof(*t1), sk);
 791         if (sk->debug)
 792                  printk("\rtcp_ack: seq %lx ack %lx\n", sequence, ack);
 793         tcp_statistics.TcpOutSegs++;
 794         sk->prot->queue_xmit(sk, dev, buff, 1);
 795 }
 796 
 797 
 798 /* 
 799  *      This routine builds a generic TCP header. 
 800  */
 801  
 802 static int tcp_build_header(struct tcphdr *th, struct sock *sk, int push)
     /* [previous][next][first][last][top][bottom][index][help] */
 803 {
 804 
 805         /* FIXME: want to get rid of this. */
 806         memcpy(th,(void *) &(sk->dummy_th), sizeof(*th));
 807         th->seq = htonl(sk->write_seq);
 808         th->psh =(push == 0) ? 1 : 0;
 809         th->doff = sizeof(*th)/4;
 810         th->ack = 1;
 811         th->fin = 0;
 812         sk->ack_backlog = 0;
 813         sk->bytes_rcv = 0;
 814         sk->ack_timed = 0;
 815         th->ack_seq = htonl(sk->acked_seq);
 816         sk->window = tcp_select_window(sk)/*sk->prot->rspace(sk)*/;
 817         th->window = htons(sk->window);
 818 
 819         return(sizeof(*th));
 820 }
 821 
 822 /*
 823  *      This routine copies from a user buffer into a socket,
 824  *      and starts the transmit system.
 825  */
 826 
 827 static int tcp_write(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
 828           int len, int nonblock, unsigned flags)
 829 {
 830         int copied = 0;
 831         int copy;
 832         int tmp;
 833         struct sk_buff *skb;
 834         struct sk_buff *send_tmp;
 835         unsigned char *buff;
 836         struct proto *prot;
 837         struct device *dev = NULL;
 838 
 839         sk->inuse=1;
 840         prot = sk->prot;
 841         while(len > 0) 
 842         {
 843                 if (sk->err) 
 844                 {                       /* Stop on an error */
 845                         release_sock(sk);
 846                         if (copied) 
 847                                 return(copied);
 848                         tmp = -sk->err;
 849                         sk->err = 0;
 850                         return(tmp);
 851                 }
 852 
 853         /*
 854          *      First thing we do is make sure that we are established. 
 855          */
 856         
 857                 if (sk->shutdown & SEND_SHUTDOWN) 
 858                 {
 859                         release_sock(sk);
 860                         sk->err = EPIPE;
 861                         if (copied) 
 862                                 return(copied);
 863                         sk->err = 0;
 864                         return(-EPIPE);
 865                 }
 866 
 867 
 868         /* 
 869          *      Wait for a connection to finish.
 870          */
 871         
 872                 while(sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT) 
 873                 {
 874                         if (sk->err) 
 875                         {
 876                                 release_sock(sk);
 877                                 if (copied) 
 878                                         return(copied);
 879                                 tmp = -sk->err;
 880                                 sk->err = 0;
 881                                 return(tmp);
 882                         }
 883 
 884                         if (sk->state != TCP_SYN_SENT && sk->state != TCP_SYN_RECV) 
 885                         {
 886                                 release_sock(sk);
 887                                 if (copied) 
 888                                         return(copied);
 889 
 890                                 if (sk->err) 
 891                                 {
 892                                         tmp = -sk->err;
 893                                         sk->err = 0;
 894                                         return(tmp);
 895                                 }
 896 
 897                                 if (sk->keepopen) 
 898                                 {
 899                                         send_sig(SIGPIPE, current, 0);
 900                                 }
 901                                 return(-EPIPE);
 902                         }
 903 
 904                         if (nonblock || copied) 
 905                         {
 906                                 release_sock(sk);
 907                                 if (copied) 
 908                                         return(copied);
 909                                 return(-EAGAIN);
 910                         }
 911 
 912                         release_sock(sk);
 913                         cli();
 914                 
 915                         if (sk->state != TCP_ESTABLISHED &&
 916                                 sk->state != TCP_CLOSE_WAIT && sk->err == 0) 
 917                         {
 918                                 interruptible_sleep_on(sk->sleep);
 919                                 if (current->signal & ~current->blocked) 
 920                                 {
 921                                         sti();
 922                                         if (copied) 
 923                                                 return(copied);
 924                                         return(-ERESTARTSYS);
 925                                 }
 926                         }
 927                         sk->inuse = 1;
 928                         sti();
 929                 }
 930 
 931         /*
 932          * The following code can result in copy <= if sk->mss is ever
 933          * decreased.  It shouldn't be.  sk->mss is min(sk->mtu, sk->max_window).
 934          * sk->mtu is constant once SYN processing is finished.  I.e. we
 935          * had better not get here until we've seen his SYN and at least one
 936          * valid ack.  (The SYN sets sk->mtu and the ack sets sk->max_window.)
 937          * But ESTABLISHED should guarantee that.  sk->max_window is by definition
 938          * non-decreasing.  Note that any ioctl to set user_mss must be done
 939          * before the exchange of SYN's.  If the initial ack from the other
 940          * end has a window of 0, max_window and thus mss will both be 0.
 941          */
 942 
 943         /* 
 944          *      Now we need to check if we have a half built packet. 
 945          */
 946 
 947                 if ((skb = tcp_dequeue_partial(sk)) != NULL) 
 948                 {
 949                         int hdrlen;
 950 
 951                          /* IP header + TCP header */
 952                         hdrlen = ((unsigned long)skb->h.th - (unsigned long)skb->data)
 953                                  + sizeof(struct tcphdr);
 954         
 955                         /* Add more stuff to the end of skb->len */
 956                         if (!(flags & MSG_OOB)) 
 957                         {
 958                                 copy = min(sk->mss - (skb->len - hdrlen), len);
 959                                 /* FIXME: this is really a bug. */
 960                                 if (copy <= 0) 
 961                                 {
 962                                         printk("TCP: **bug**: \"copy\" <= 0!!\n");
 963                                         copy = 0;
 964                                 }
 965           
 966                                 memcpy_fromfs(skb->data + skb->len, from, copy);
 967                                 skb->len += copy;
 968                                 from += copy;
 969                                 copied += copy;
 970                                 len -= copy;
 971                                 sk->write_seq += copy;
 972                         }
 973                         if ((skb->len - hdrlen) >= sk->mss ||
 974                                 (flags & MSG_OOB) || !sk->packets_out)
 975                                 tcp_send_skb(sk, skb);
 976                         else
 977                                 tcp_enqueue_partial(skb, sk);
 978                         continue;
 979                 }
 980 
 981         /*
 982          * We also need to worry about the window.
 983          * If window < 1/2 the maximum window we've seen from this
 984          *   host, don't use it.  This is sender side
 985          *   silly window prevention, as specified in RFC1122.
 986          *   (Note that this is diffferent than earlier versions of
 987          *   SWS prevention, e.g. RFC813.).  What we actually do is 
 988          *   use the whole MSS.  Since the results in the right
 989          *   edge of the packet being outside the window, it will
 990          *   be queued for later rather than sent.
 991          */
 992 
 993                 copy = sk->window_seq - sk->write_seq;
 994                 if (copy <= 0 || copy < (sk->max_window >> 1) || copy > sk->mss)
 995                         copy = sk->mss;
 996                 if (copy > len)
 997                         copy = len;
 998 
 999         /*
1000          *      We should really check the window here also. 
1001          */
1002          
1003                 send_tmp = NULL;
1004                 if (copy < sk->mss && !(flags & MSG_OOB)) 
1005                 {
1006                         /*
1007                          *      We will release the socket incase we sleep here. 
1008                          */
1009                         release_sock(sk);
1010                         /*
1011                          *      NB: following must be mtu, because mss can be increased.
1012                          *      mss is always <= mtu 
1013                          */
1014                         skb = prot->wmalloc(sk, sk->mtu + 128 + prot->max_header, 0, GFP_KERNEL);
1015                         sk->inuse = 1;
1016                         send_tmp = skb;
1017                 } 
1018                 else 
1019                 {
1020                         /*
1021                          *      We will release the socket incase we sleep here. 
1022                          */
1023                         release_sock(sk);
1024                         skb = prot->wmalloc(sk, copy + prot->max_header , 0, GFP_KERNEL);
1025                         sk->inuse = 1;
1026                 }
1027 
1028                 /*
1029                  *      If we didn't get any memory, we need to sleep. 
1030                  */
1031 
1032                 if (skb == NULL) 
1033                 {
1034                         if (nonblock /* || copied */) 
1035                         {
1036                                 release_sock(sk);
1037                                 if (copied) 
1038                                         return(copied);
1039                                 return(-EAGAIN);
1040                         }
1041 
1042                         /*
1043                          *      FIXME: here is another race condition. 
1044                          */
1045 
1046                         tmp = sk->wmem_alloc;
1047                         release_sock(sk);
1048                         cli();
1049                         /*
1050                          *      Again we will try to avoid it. 
1051                          */
1052                         if (tmp <= sk->wmem_alloc &&
1053                                   (sk->state == TCP_ESTABLISHED||sk->state == TCP_CLOSE_WAIT)
1054                                 && sk->err == 0) 
1055                         {
1056                                 interruptible_sleep_on(sk->sleep);
1057                                 if (current->signal & ~current->blocked) 
1058                                 {
1059                                         sti();
1060                                         if (copied) 
1061                                                 return(copied);
1062                                         return(-ERESTARTSYS);
1063                                 }
1064                         }
1065                         sk->inuse = 1;
1066                         sti();
1067                         continue;
1068                 }
1069 
1070                 skb->len = 0;
1071                 skb->sk = sk;
1072                 skb->free = 0;
1073                 skb->localroute = sk->localroute|(flags&MSG_DONTROUTE);
1074         
1075                 buff = skb->data;
1076         
1077                 /*
1078                  * FIXME: we need to optimize this.
1079                  * Perhaps some hints here would be good.
1080                  */
1081                 
1082                 tmp = prot->build_header(skb, sk->saddr, sk->daddr, &dev,
1083                                  IPPROTO_TCP, sk->opt, skb->mem_len,sk->ip_tos,sk->ip_ttl);
1084                 if (tmp < 0 ) 
1085                 {
1086                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1087                         release_sock(sk);
1088                         if (copied) 
1089                                 return(copied);
1090                         return(tmp);
1091                 }
1092                 skb->len += tmp;
1093                 skb->dev = dev;
1094                 buff += tmp;
1095                 skb->h.th =(struct tcphdr *) buff;
1096                 tmp = tcp_build_header((struct tcphdr *)buff, sk, len-copy);
1097                 if (tmp < 0) 
1098                 {
1099                         prot->wfree(sk, skb->mem_addr, skb->mem_len);
1100                         release_sock(sk);
1101                         if (copied) 
1102                                 return(copied);
1103                         return(tmp);
1104                 }
1105 
1106                 if (flags & MSG_OOB) 
1107                 {
1108                         ((struct tcphdr *)buff)->urg = 1;
1109                         ((struct tcphdr *)buff)->urg_ptr = ntohs(copy);
1110                 }
1111                 skb->len += tmp;
1112                 memcpy_fromfs(buff+tmp, from, copy);
1113 
1114                 from += copy;
1115                 copied += copy;
1116                 len -= copy;
1117                 skb->len += copy;
1118                 skb->free = 0;
1119                 sk->write_seq += copy;
1120         
1121                 if (send_tmp != NULL && sk->packets_out) 
1122                 {
1123                         tcp_enqueue_partial(send_tmp, sk);
1124                         continue;
1125                 }
1126                 tcp_send_skb(sk, skb);
1127         }
1128         sk->err = 0;
1129 
1130 /*
1131  *      Nagles rule. Turn Nagle off with TCP_NODELAY for highly
1132  *      interactive fast network servers. It's meant to be on and
1133  *      it really improves the throughput though not the echo time
1134  *      on my slow slip link - Alan
1135  */
1136 
1137 /*
1138  *      Avoid possible race on send_tmp - c/o Johannes Stille 
1139  */
1140  
1141         if(sk->partial && ((!sk->packets_out) 
1142      /* If not nagling we can send on the before case too.. */
1143               || (sk->nonagle && before(sk->write_seq , sk->window_seq))
1144         ))
1145                 tcp_send_partial(sk);
1146 
1147         release_sock(sk);
1148         return(copied);
1149 }
1150 
1151 
1152 static int tcp_sendto(struct sock *sk, unsigned char *from,
     /* [previous][next][first][last][top][bottom][index][help] */
1153            int len, int nonblock, unsigned flags,
1154            struct sockaddr_in *addr, int addr_len)
1155 {
1156         if (flags & ~(MSG_OOB|MSG_DONTROUTE))
1157                 return -EINVAL;
1158         if (addr_len < sizeof(*addr)) 
1159                 return(-EINVAL);
1160         if (addr->sin_family && addr->sin_family != AF_INET) 
1161                 return(-EINVAL);
1162         if (addr->sin_port != sk->dummy_th.dest) 
1163                 return(-EISCONN);
1164         if (addr->sin_addr.s_addr != sk->daddr) 
1165                 return(-EISCONN);
1166         return(tcp_write(sk, from, len, nonblock, flags));
1167 }
1168 
1169 
1170 static void tcp_read_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1171 {
1172         int tmp;
1173         struct device *dev = NULL;
1174         struct tcphdr *t1;
1175         struct sk_buff *buff;
1176 
1177         if (!sk->ack_backlog) 
1178                 return;
1179 
1180         /*
1181          * FIXME: we need to put code here to prevent this routine from
1182          * being called.  Being called once in a while is ok, so only check
1183          * if this is the second time in a row.
1184          */
1185 
1186         /*
1187          * We need to grab some memory, and put together an ack,
1188          * and then put it into the queue to be sent.
1189          */
1190 
1191         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
1192         if (buff == NULL) 
1193         {
1194                 /* Try again real soon. */
1195                 reset_timer(sk, TIME_WRITE, 10);
1196                 return;
1197         }
1198 
1199         buff->len = sizeof(struct tcphdr);
1200         buff->sk = sk;
1201         buff->localroute = sk->localroute;
1202         
1203         /*
1204          *      Put in the IP header and routing stuff. 
1205          */
1206 
1207         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
1208                                IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
1209         if (tmp < 0) 
1210         {
1211                 buff->free=1;
1212                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
1213                 return;
1214         }
1215 
1216         buff->len += tmp;
1217         t1 =(struct tcphdr *)(buff->data +tmp);
1218 
1219         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
1220         t1->seq = htonl(sk->sent_seq);
1221         t1->ack = 1;
1222         t1->res1 = 0;
1223         t1->res2 = 0;
1224         t1->rst = 0;
1225         t1->urg = 0;
1226         t1->syn = 0;
1227         t1->psh = 0;
1228         sk->ack_backlog = 0;
1229         sk->bytes_rcv = 0;
1230         sk->window = tcp_select_window(sk);/*sk->prot->rspace(sk);*/
1231         t1->window = ntohs(sk->window);
1232         t1->ack_seq = ntohl(sk->acked_seq);
1233         t1->doff = sizeof(*t1)/4;
1234         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1235         sk->prot->queue_xmit(sk, dev, buff, 1);
1236         tcp_statistics.TcpOutSegs++;
1237 }
1238 
1239 
1240 /*
1241  *      FIXME:
1242  *      This routine frees used buffers.
1243  *      It should consider sending an ACK to let the
1244  *      other end know we now have a bigger window.
1245  */
1246 
1247 static void cleanup_rbuf(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
1248 {
1249         unsigned long flags;
1250         unsigned long left;
1251         struct sk_buff *skb;
1252         unsigned long rspace;
1253 
1254         if(sk->debug)
1255                 printk("cleaning rbuf for sk=%p\n", sk);
1256   
1257         save_flags(flags);
1258         cli();
1259   
1260         left = sk->prot->rspace(sk);
1261  
1262         /*
1263          * We have to loop through all the buffer headers,
1264          * and try to free up all the space we can.
1265          */
1266 
1267         while((skb=skb_peek(&sk->receive_queue)) != NULL) 
1268         {
1269                 if (!skb->used) 
1270                         break;
1271                 skb_unlink(skb);
1272                 skb->sk = sk;
1273                 kfree_skb(skb, FREE_READ);
1274         }
1275 
1276         restore_flags(flags);
1277 
1278         /*
1279          * FIXME:
1280          * At this point we should send an ack if the difference
1281          * in the window, and the amount of space is bigger than
1282          * TCP_WINDOW_DIFF.
1283          */
1284 
1285         if(sk->debug)
1286                 printk("sk->rspace = %lu, was %lu\n", sk->prot->rspace(sk),
1287                                             left);
1288         if ((rspace=sk->prot->rspace(sk)) != left) 
1289         {
1290                 /*
1291                  * This area has caused the most trouble.  The current strategy
1292                  * is to simply do nothing if the other end has room to send at
1293                  * least 3 full packets, because the ack from those will auto-
1294                  * matically update the window.  If the other end doesn't think
1295                  * we have much space left, but we have room for atleast 1 more
1296                  * complete packet than it thinks we do, we will send an ack
1297                  * immediatedly.  Otherwise we will wait up to .5 seconds in case
1298                  * the user reads some more.
1299                  */
1300                 sk->ack_backlog++;
1301         /*
1302          * It's unclear whether to use sk->mtu or sk->mss here.  They differ only
1303          * if the other end is offering a window smaller than the agreed on MSS
1304          * (called sk->mtu here).  In theory there's no connection between send
1305          * and receive, and so no reason to think that they're going to send
1306          * small packets.  For the moment I'm using the hack of reducing the mss
1307          * only on the send side, so I'm putting mtu here.
1308          */
1309 
1310                 if (rspace > (sk->window - sk->bytes_rcv + sk->mtu)) 
1311                 {
1312                         /* Send an ack right now. */
1313                         tcp_read_wakeup(sk);
1314                 } 
1315                 else 
1316                 {
1317                         /* Force it to send an ack soon. */
1318                         int was_active = del_timer(&sk->timer);
1319                         if (!was_active || TCP_ACK_TIME < sk->timer.expires) 
1320                         {
1321                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
1322                         } 
1323                         else
1324                                 add_timer(&sk->timer);
1325                 }
1326         }
1327 } 
1328 
1329 
1330 /*
1331  *      Handle reading urgent data. 
1332  */
1333  
1334 static int tcp_read_urg(struct sock * sk, int nonblock,
     /* [previous][next][first][last][top][bottom][index][help] */
1335              unsigned char *to, int len, unsigned flags)
1336 {
1337         struct wait_queue wait = { current, NULL };
1338 
1339         while (len > 0) 
1340         {
1341                 if (sk->urginline || !sk->urg_data || sk->urg_data == URG_READ)
1342                         return -EINVAL;
1343                 if (sk->urg_data & URG_VALID) 
1344                 {
1345                         char c = sk->urg_data;
1346                         if (!(flags & MSG_PEEK))
1347                                 sk->urg_data = URG_READ;
1348                         put_fs_byte(c, to);
1349                         return 1;
1350                 }
1351 
1352                 if (sk->err) 
1353                 {
1354                         int tmp = -sk->err;
1355                         sk->err = 0;
1356                         return tmp;
1357                 }
1358 
1359                 if (sk->state == TCP_CLOSE || sk->done) 
1360                 {
1361                         if (!sk->done) {
1362                                 sk->done = 1;
1363                                 return 0;
1364                         }
1365                         return -ENOTCONN;
1366                 }
1367 
1368                 if (sk->shutdown & RCV_SHUTDOWN) 
1369                 {
1370                         sk->done = 1;
1371                         return 0;
1372                 }
1373 
1374                 if (nonblock)
1375                         return -EAGAIN;
1376 
1377                 if (current->signal & ~current->blocked)
1378                         return -ERESTARTSYS;
1379 
1380                 current->state = TASK_INTERRUPTIBLE;
1381                 add_wait_queue(sk->sleep, &wait);
1382                 if ((sk->urg_data & URG_NOTYET) && sk->err == 0 &&
1383                     !(sk->shutdown & RCV_SHUTDOWN))
1384                         schedule();
1385                 remove_wait_queue(sk->sleep, &wait);
1386                 current->state = TASK_RUNNING;
1387         }
1388         return 0;
1389 }
1390 
1391 
1392 /*
1393  *      This routine copies from a sock struct into the user buffer. 
1394  */
1395  
1396 static int tcp_read(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1397         int len, int nonblock, unsigned flags)
1398 {
1399         struct wait_queue wait = { current, NULL };
1400         int copied = 0;
1401         unsigned long peek_seq;
1402         unsigned long *seq;
1403         unsigned long used;
1404 
1405         /* This error should be checked. */
1406         if (sk->state == TCP_LISTEN)
1407                 return -ENOTCONN;
1408 
1409         /* Urgent data needs to be handled specially. */
1410         if (flags & MSG_OOB)
1411                 return tcp_read_urg(sk, nonblock, to, len, flags);
1412 
1413         peek_seq = sk->copied_seq;
1414         seq = &sk->copied_seq;
1415         if (flags & MSG_PEEK)
1416                 seq = &peek_seq;
1417 
1418         add_wait_queue(sk->sleep, &wait);
1419         sk->inuse = 1;
1420         while (len > 0) 
1421         {
1422                 struct sk_buff * skb;
1423                 unsigned long offset;
1424         
1425                 /*
1426                  * are we at urgent data? Stop if we have read anything.
1427                  */
1428                 if (copied && sk->urg_data && sk->urg_seq == 1+*seq)
1429                         break;
1430 
1431                 current->state = TASK_INTERRUPTIBLE;
1432 
1433                 skb = skb_peek(&sk->receive_queue);
1434                 do 
1435                 {
1436                         if (!skb)
1437                                 break;
1438                         if (before(1+*seq, skb->h.th->seq))
1439                                 break;
1440                         offset = 1 + *seq - skb->h.th->seq;
1441                         if (skb->h.th->syn)
1442                                 offset--;
1443                         if (offset < skb->len)
1444                                 goto found_ok_skb;
1445                         if (!(flags & MSG_PEEK))
1446                                 skb->used = 1;
1447                         skb = skb->next;
1448                 }
1449                 while (skb != (struct sk_buff *)&sk->receive_queue);
1450 
1451                 if (copied)
1452                         break;
1453 
1454                 if (sk->err) 
1455                 {
1456                         copied = -sk->err;
1457                         sk->err = 0;
1458                         break;
1459                 }
1460 
1461                 if (sk->state == TCP_CLOSE) 
1462                 {
1463                         if (!sk->done) 
1464                         {
1465                                 sk->done = 1;
1466                                 break;
1467                         }
1468                         copied = -ENOTCONN;
1469                         break;
1470                 }
1471 
1472                 if (sk->shutdown & RCV_SHUTDOWN) 
1473                 {
1474                         sk->done = 1;
1475                         break;
1476                 }
1477                         
1478                 if (nonblock) 
1479                 {
1480                         copied = -EAGAIN;
1481                         break;
1482                 }
1483 
1484                 cleanup_rbuf(sk);
1485                 release_sock(sk);
1486                 schedule();
1487                 sk->inuse = 1;
1488 
1489                 if (current->signal & ~current->blocked) 
1490                 {
1491                         copied = -ERESTARTSYS;
1492                         break;
1493                 }
1494                 continue;
1495 
1496         found_ok_skb:
1497                 /* Ok so how much can we use ? */
1498                 used = skb->len - offset;
1499                 if (len < used)
1500                         used = len;
1501                 /* do we have urgent data here? */
1502                 if (sk->urg_data) 
1503                 {
1504                         unsigned long urg_offset = sk->urg_seq - (1 + *seq);
1505                         if (urg_offset < used) 
1506                         {
1507                                 if (!urg_offset) 
1508                                 {
1509                                         if (!sk->urginline) 
1510                                         {
1511                                                 ++*seq;
1512                                                 offset++;
1513                                                 used--;
1514                                         }
1515                                 }
1516                                 else
1517                                         used = urg_offset;
1518                         }
1519                 }
1520                 /* Copy it */
1521                 memcpy_tofs(to,((unsigned char *)skb->h.th) +
1522                         skb->h.th->doff*4 + offset, used);
1523                 copied += used;
1524                 len -= used;
1525                 to += used;
1526                 *seq += used;
1527                 if (after(sk->copied_seq+1,sk->urg_seq))
1528                         sk->urg_data = 0;
1529                 if (!(flags & MSG_PEEK) && (used + offset >= skb->len))
1530                         skb->used = 1;
1531         }
1532         remove_wait_queue(sk->sleep, &wait);
1533         current->state = TASK_RUNNING;
1534 
1535         /* Clean up data we have read: This will do ACK frames */
1536         cleanup_rbuf(sk);
1537         release_sock(sk);
1538         return copied;
1539 }
1540 
1541  
1542 /*
1543  *      Shutdown the sending side of a connection.
1544  */
1545 
1546 void tcp_shutdown(struct sock *sk, int how)
     /* [previous][next][first][last][top][bottom][index][help] */
1547 {
1548         struct sk_buff *buff;
1549         struct tcphdr *t1, *th;
1550         struct proto *prot;
1551         int tmp;
1552         struct device *dev = NULL;
1553 
1554         /*
1555          * We need to grab some memory, and put together a FIN,
1556          * and then put it into the queue to be sent.
1557          * FIXME:
1558          *
1559          *      Tim MacKenzie(tym@dibbler.cs.monash.edu.au) 4 Dec '92.
1560          *      Most of this is guesswork, so maybe it will work...
1561          */
1562 
1563         if (!(how & SEND_SHUTDOWN)) 
1564                 return;
1565          
1566         /*
1567          *      If we've already sent a FIN, return. 
1568          */
1569          
1570         if (sk->state == TCP_FIN_WAIT1 ||
1571             sk->state == TCP_FIN_WAIT2 ||
1572             sk->state == TCP_CLOSING ||
1573             sk->state == TCP_LAST_ACK ||
1574             sk->state == TCP_TIME_WAIT
1575         ) 
1576         {
1577                 return;
1578         }
1579         sk->inuse = 1;
1580 
1581         /*
1582          * flag that the sender has shutdown
1583          */
1584 
1585         sk->shutdown |= SEND_SHUTDOWN;
1586 
1587         /*
1588          *  Clear out any half completed packets. 
1589          */
1590 
1591         if (sk->partial)
1592                 tcp_send_partial(sk);
1593 
1594         prot =(struct proto *)sk->prot;
1595         th =(struct tcphdr *)&sk->dummy_th;
1596         release_sock(sk); /* incase the malloc sleeps. */
1597         buff = prot->wmalloc(sk, MAX_RESET_SIZE,1 , GFP_KERNEL);
1598         if (buff == NULL)
1599                 return;
1600         sk->inuse = 1;
1601 
1602         buff->sk = sk;
1603         buff->len = sizeof(*t1);
1604         buff->localroute = sk->localroute;
1605         t1 =(struct tcphdr *) buff->data;
1606 
1607         /*
1608          *      Put in the IP header and routing stuff. 
1609          */
1610 
1611         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
1612                            IPPROTO_TCP, sk->opt,
1613                            sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
1614         if (tmp < 0) 
1615         {
1616                 /*
1617                  *      Finish anyway, treat this as a send that got lost. 
1618                  *
1619                  *      Enter FIN_WAIT1 on normal shutdown, which waits for
1620                  *      written data to be completely acknowledged along
1621                  *      with an acknowledge to our FIN.
1622                  *
1623                  *      Enter FIN_WAIT2 on abnormal shutdown -- close before
1624                  *      connection established.
1625                  */
1626                 buff->free=1;
1627                 prot->wfree(sk,buff->mem_addr, buff->mem_len);
1628 
1629                 if (sk->state == TCP_ESTABLISHED)
1630                         sk->state = TCP_FIN_WAIT1;
1631                 else if(sk->state == TCP_CLOSE_WAIT)
1632                         sk->state = TCP_LAST_ACK;
1633                 else
1634                         sk->state = TCP_FIN_WAIT2;
1635 
1636                 release_sock(sk);
1637                 return;
1638         }
1639 
1640         t1 =(struct tcphdr *)((char *)t1 +tmp);
1641         buff->len += tmp;
1642         buff->dev = dev;
1643         memcpy(t1, th, sizeof(*t1));
1644         t1->seq = ntohl(sk->write_seq);
1645         sk->write_seq++;
1646         buff->h.seq = sk->write_seq;
1647         t1->ack = 1;
1648         t1->ack_seq = ntohl(sk->acked_seq);
1649         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
1650         t1->fin = 1;
1651         t1->rst = 0;
1652         t1->doff = sizeof(*t1)/4;
1653         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
1654 
1655         /*
1656          * Can't just queue this up.
1657          * It should go at the end of the write queue.
1658          */
1659         
1660         if (skb_peek(&sk->write_queue) != NULL) 
1661         {
1662                 buff->free=0;
1663                 if (buff->next != NULL) 
1664                 {
1665                         printk("tcp_shutdown: next != NULL\n");
1666                         skb_unlink(buff);
1667                 }
1668                 skb_queue_tail(&sk->write_queue, buff);
1669         } 
1670         else 
1671         {
1672                 sk->sent_seq = sk->write_seq;
1673                 sk->prot->queue_xmit(sk, dev, buff, 0);
1674         }
1675 
1676         if (sk->state == TCP_ESTABLISHED) 
1677                 sk->state = TCP_FIN_WAIT1;
1678         else if (sk->state == TCP_CLOSE_WAIT)
1679                 sk->state = TCP_LAST_ACK;
1680         else
1681                 sk->state = TCP_FIN_WAIT2;
1682 
1683         release_sock(sk);
1684 }
1685 
1686 
1687 static int
1688 tcp_recvfrom(struct sock *sk, unsigned char *to,
     /* [previous][next][first][last][top][bottom][index][help] */
1689              int to_len, int nonblock, unsigned flags,
1690              struct sockaddr_in *addr, int *addr_len)
1691 {
1692         int result;
1693   
1694         /* 
1695          *      Have to check these first unlike the old code. If 
1696          *      we check them after we lose data on an error
1697          *      which is wrong 
1698          */
1699 
1700         if(addr_len)
1701                 *addr_len = sizeof(*addr);
1702         result=tcp_read(sk, to, to_len, nonblock, flags);
1703 
1704         if (result < 0) 
1705                 return(result);
1706   
1707         if(addr)
1708         {
1709                 addr->sin_family = AF_INET;
1710                 addr->sin_port = sk->dummy_th.dest;
1711                 addr->sin_addr.s_addr = sk->daddr;
1712         }
1713         return(result);
1714 }
1715 
1716 
1717 /*
1718  *      This routine will send an RST to the other tcp. 
1719  */
1720  
1721 static void tcp_reset(unsigned long saddr, unsigned long daddr, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
1722           struct proto *prot, struct options *opt, struct device *dev, int tos, int ttl)
1723 {
1724         struct sk_buff *buff;
1725         struct tcphdr *t1;
1726         int tmp;
1727         struct device *ndev=NULL;
1728   
1729 /*
1730  * We need to grab some memory, and put together an RST,
1731  * and then put it into the queue to be sent.
1732  */
1733 
1734         buff = prot->wmalloc(NULL, MAX_RESET_SIZE, 1, GFP_ATOMIC);
1735         if (buff == NULL) 
1736                 return;
1737 
1738         buff->len = sizeof(*t1);
1739         buff->sk = NULL;
1740         buff->dev = dev;
1741         buff->localroute = 0;
1742 
1743         t1 =(struct tcphdr *) buff->data;
1744 
1745         /*
1746          *      Put in the IP header and routing stuff. 
1747          */
1748 
1749         tmp = prot->build_header(buff, saddr, daddr, &ndev, IPPROTO_TCP, opt,
1750                            sizeof(struct tcphdr),tos,ttl);
1751         if (tmp < 0) 
1752         {
1753                 buff->free = 1;
1754                 prot->wfree(NULL, buff->mem_addr, buff->mem_len);
1755                 return;
1756         }
1757 
1758         t1 =(struct tcphdr *)((char *)t1 +tmp);
1759         buff->len += tmp;
1760         memcpy(t1, th, sizeof(*t1));
1761 
1762         /*
1763          *      Swap the send and the receive. 
1764          */
1765 
1766         t1->dest = th->source;
1767         t1->source = th->dest;
1768         t1->rst = 1;  
1769         t1->window = 0;
1770   
1771         if(th->ack)
1772         {
1773                 t1->ack = 0;
1774                 t1->seq = th->ack_seq;
1775                 t1->ack_seq = 0;
1776         }
1777         else
1778         {
1779                 t1->ack = 1;
1780                 if(!th->syn)
1781                         t1->ack_seq=htonl(th->seq);
1782                 else
1783                         t1->ack_seq=htonl(th->seq+1);
1784                 t1->seq=0;
1785         }
1786 
1787         t1->syn = 0;
1788         t1->urg = 0;
1789         t1->fin = 0;
1790         t1->psh = 0;
1791         t1->doff = sizeof(*t1)/4;
1792         tcp_send_check(t1, saddr, daddr, sizeof(*t1), NULL);
1793         prot->queue_xmit(NULL, dev, buff, 1);
1794         tcp_statistics.TcpOutSegs++;
1795 }
1796 
1797 
1798 /*
1799  *      Look for tcp options. Parses everything but only knows about MSS.
1800  *      This routine is always called with the packet containing the SYN.
1801  *      However it may also be called with the ack to the SYN.  So you
1802  *      can't assume this is always the SYN.  It's always called after
1803  *      we have set up sk->mtu to our own MTU.
1804  */
1805  
1806 static void tcp_options(struct sock *sk, struct tcphdr *th)
     /* [previous][next][first][last][top][bottom][index][help] */
1807 {
1808         unsigned char *ptr;
1809         int length=(th->doff*4)-sizeof(struct tcphdr);
1810         int mss_seen = 0;
1811     
1812         ptr = (unsigned char *)(th + 1);
1813   
1814         while(length>0)
1815         {
1816                 int opcode=*ptr++;
1817                 int opsize=*ptr++;
1818                 switch(opcode)
1819                 {
1820                         case TCPOPT_EOL:
1821                                 return;
1822                         case TCPOPT_NOP:
1823                                 length-=2;
1824                                 continue;
1825                         
1826                         default:
1827                                 if(opsize<=2)   /* Avoid silly options looping forever */
1828                                         return;
1829                                 switch(opcode)
1830                                 {
1831                                         case TCPOPT_MSS:
1832                                                 if(opsize==4 && th->syn)
1833                                                 {
1834                                                         sk->mtu=min(sk->mtu,ntohs(*(unsigned short *)ptr));
1835                                                         mss_seen = 1;
1836                                                 }
1837                                                 break;
1838                                                 /* Add other options here as people feel the urge to implement stuff like large windows */
1839                                 }
1840                                 ptr+=opsize-2;
1841                                 length-=opsize;
1842                 }
1843         }
1844         if (th->syn) 
1845         {
1846                 if (! mss_seen)
1847                       sk->mtu=min(sk->mtu, 536);  /* default MSS if none sent */
1848         }
1849 #ifdef CONFIG_INET_PCTCP
1850         sk->mss = min(sk->max_window >> 1, sk->mtu);
1851 #else    
1852         sk->mss = min(sk->max_window, sk->mtu);
1853 #endif  
1854 }
1855 
1856 static inline unsigned long default_mask(unsigned long dst)
     /* [previous][next][first][last][top][bottom][index][help] */
1857 {
1858         dst = ntohl(dst);
1859         if (IN_CLASSA(dst))
1860                 return htonl(IN_CLASSA_NET);
1861         if (IN_CLASSB(dst))
1862                 return htonl(IN_CLASSB_NET);
1863         return htonl(IN_CLASSC_NET);
1864 }
1865 
1866 /*
1867  *      This routine handles a connection request.
1868  *      It should make sure we haven't already responded.
1869  *      Because of the way BSD works, we have to send a syn/ack now.
1870  *      This also means it will be harder to close a socket which is
1871  *      listening.
1872  */
1873  
1874 static void tcp_conn_request(struct sock *sk, struct sk_buff *skb,
     /* [previous][next][first][last][top][bottom][index][help] */
1875                  unsigned long daddr, unsigned long saddr,
1876                  struct options *opt, struct device *dev)
1877 {
1878         struct sk_buff *buff;
1879         struct tcphdr *t1;
1880         unsigned char *ptr;
1881         struct sock *newsk;
1882         struct tcphdr *th;
1883         struct device *ndev=NULL;
1884         int tmp;
1885         struct rtable *rt;
1886   
1887         th = skb->h.th;
1888 
1889         /* If the socket is dead, don't accept the connection. */
1890         if (!sk->dead) 
1891         {
1892                 sk->data_ready(sk,0);
1893         }
1894         else 
1895         {
1896                 tcp_reset(daddr, saddr, th, sk->prot, opt, dev, sk->ip_tos,sk->ip_ttl);
1897                 tcp_statistics.TcpAttemptFails++;
1898                 kfree_skb(skb, FREE_READ);
1899                 return;
1900         }
1901 
1902         /*
1903          * Make sure we can accept more.  This will prevent a
1904          * flurry of syns from eating up all our memory.
1905          */
1906 
1907         if (sk->ack_backlog >= sk->max_ack_backlog) 
1908         {
1909                 tcp_statistics.TcpAttemptFails++;
1910                 kfree_skb(skb, FREE_READ);
1911                 return;
1912         }
1913 
1914         /*
1915          * We need to build a new sock struct.
1916          * It is sort of bad to have a socket without an inode attached
1917          * to it, but the wake_up's will just wake up the listening socket,
1918          * and if the listening socket is destroyed before this is taken
1919          * off of the queue, this will take care of it.
1920          */
1921 
1922         newsk = (struct sock *) kmalloc(sizeof(struct sock), GFP_ATOMIC);
1923         if (newsk == NULL) 
1924         {
1925                 /* just ignore the syn.  It will get retransmitted. */
1926                 tcp_statistics.TcpAttemptFails++;
1927                 kfree_skb(skb, FREE_READ);
1928                 return;
1929         }
1930 
1931         memcpy(newsk, sk, sizeof(*newsk));
1932         skb_queue_head_init(&newsk->write_queue);
1933         skb_queue_head_init(&newsk->receive_queue);
1934         newsk->send_head = NULL;
1935         newsk->send_tail = NULL;
1936         skb_queue_head_init(&newsk->back_log);
1937         newsk->rtt = 0;         /*TCP_CONNECT_TIME<<3*/
1938         newsk->rto = TCP_TIMEOUT_INIT;
1939         newsk->mdev = 0;
1940         newsk->max_window = 0;
1941         newsk->cong_window = 1;
1942         newsk->cong_count = 0;
1943         newsk->ssthresh = 0;
1944         newsk->backoff = 0;
1945         newsk->blog = 0;
1946         newsk->intr = 0;
1947         newsk->proc = 0;
1948         newsk->done = 0;
1949         newsk->partial = NULL;
1950         newsk->pair = NULL;
1951         newsk->wmem_alloc = 0;
1952         newsk->rmem_alloc = 0;
1953         newsk->localroute = sk->localroute;
1954 
1955         newsk->max_unacked = MAX_WINDOW - TCP_WINDOW_DIFF;
1956 
1957         newsk->err = 0;
1958         newsk->shutdown = 0;
1959         newsk->ack_backlog = 0;
1960         newsk->acked_seq = skb->h.th->seq+1;
1961         newsk->fin_seq = skb->h.th->seq;
1962         newsk->copied_seq = skb->h.th->seq;
1963         newsk->state = TCP_SYN_RECV;
1964         newsk->timeout = 0;
1965         newsk->write_seq = jiffies * SEQ_TICK - seq_offset;
1966         newsk->window_seq = newsk->write_seq;
1967         newsk->rcv_ack_seq = newsk->write_seq;
1968         newsk->urg_data = 0;
1969         newsk->retransmits = 0;
1970         newsk->destroy = 0;
1971         newsk->timer.data = (unsigned long)newsk;
1972         newsk->timer.function = &net_timer;
1973         newsk->dummy_th.source = skb->h.th->dest;
1974         newsk->dummy_th.dest = skb->h.th->source;
1975         
1976         /*
1977          *      Swap these two, they are from our point of view. 
1978          */
1979          
1980         newsk->daddr = saddr;
1981         newsk->saddr = daddr;
1982 
1983         put_sock(newsk->num,newsk);
1984         newsk->dummy_th.res1 = 0;
1985         newsk->dummy_th.doff = 6;
1986         newsk->dummy_th.fin = 0;
1987         newsk->dummy_th.syn = 0;
1988         newsk->dummy_th.rst = 0;        
1989         newsk->dummy_th.psh = 0;
1990         newsk->dummy_th.ack = 0;
1991         newsk->dummy_th.urg = 0;
1992         newsk->dummy_th.res2 = 0;
1993         newsk->acked_seq = skb->h.th->seq + 1;
1994         newsk->copied_seq = skb->h.th->seq;
1995 
1996         /*
1997          *      Grab the ttl and tos values and use them 
1998          */
1999 
2000         newsk->ip_ttl=sk->ip_ttl;
2001         newsk->ip_tos=skb->ip_hdr->tos;
2002 
2003         /*
2004          *      Use 512 or whatever user asked for 
2005          */
2006 
2007         /*
2008          *      Note use of sk->user_mss, since user has no direct access to newsk 
2009          */
2010 
2011         rt=ip_rt_route(saddr, NULL,NULL);
2012         if (sk->user_mss)
2013                 newsk->mtu = sk->user_mss;
2014         else if(rt!=NULL && (rt->rt_flags&RTF_MTU))
2015                 newsk->mtu = rt->rt_mtu - HEADER_SIZE;
2016         else 
2017         {
2018 #ifdef CONFIG_INET_SNARL        /* Sub Nets ARe Local */
2019                 if ((saddr ^ daddr) & default_mask(saddr))
2020 #else
2021                 if ((saddr ^ daddr) & dev->pa_mask)
2022 #endif
2023                         newsk->mtu = 576 - HEADER_SIZE;
2024                 else
2025                         newsk->mtu = MAX_WINDOW;
2026         }
2027 
2028         /*
2029          *      But not bigger than device MTU 
2030          */
2031 
2032         newsk->mtu = min(newsk->mtu, dev->mtu - HEADER_SIZE);
2033 
2034         /*
2035          *      This will min with what arrived in the packet 
2036          */
2037 
2038         tcp_options(newsk,skb->h.th);
2039 
2040         buff = newsk->prot->wmalloc(newsk, MAX_SYN_SIZE, 1, GFP_ATOMIC);
2041         if (buff == NULL) 
2042         {
2043                 sk->err = -ENOMEM;
2044                 newsk->dead = 1;
2045                 release_sock(newsk);
2046                 kfree_skb(skb, FREE_READ);
2047                 tcp_statistics.TcpAttemptFails++;
2048                 return;
2049         }
2050   
2051         buff->len = sizeof(struct tcphdr)+4;
2052         buff->sk = newsk;
2053         buff->localroute = newsk->localroute;
2054 
2055         t1 =(struct tcphdr *) buff->data;
2056 
2057         /*
2058          *      Put in the IP header and routing stuff. 
2059          */
2060 
2061         tmp = sk->prot->build_header(buff, newsk->saddr, newsk->daddr, &ndev,
2062                                IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
2063 
2064         /*
2065          *      Something went wrong. 
2066          */
2067 
2068         if (tmp < 0) 
2069         {
2070                 sk->err = tmp;
2071                 buff->free=1;
2072                 kfree_skb(buff,FREE_WRITE);
2073                 newsk->dead = 1;
2074                 release_sock(newsk);
2075                 skb->sk = sk;
2076                 kfree_skb(skb, FREE_READ);
2077                 tcp_statistics.TcpAttemptFails++;
2078                 return;
2079         }
2080 
2081         buff->len += tmp;
2082         t1 =(struct tcphdr *)((char *)t1 +tmp);
2083   
2084         memcpy(t1, skb->h.th, sizeof(*t1));
2085         buff->h.seq = newsk->write_seq;
2086         /*
2087          *      Swap the send and the receive. 
2088          */
2089         t1->dest = skb->h.th->source;
2090         t1->source = newsk->dummy_th.source;
2091         t1->seq = ntohl(newsk->write_seq++);
2092         t1->ack = 1;
2093         newsk->window = tcp_select_window(newsk);/*newsk->prot->rspace(newsk);*/
2094         newsk->sent_seq = newsk->write_seq;
2095         t1->window = ntohs(newsk->window);
2096         t1->res1 = 0;
2097         t1->res2 = 0;
2098         t1->rst = 0;
2099         t1->urg = 0;
2100         t1->psh = 0;
2101         t1->syn = 1;
2102         t1->ack_seq = ntohl(skb->h.th->seq+1);
2103         t1->doff = sizeof(*t1)/4+1;
2104         ptr =(unsigned char *)(t1+1);
2105         ptr[0] = 2;
2106         ptr[1] = 4;
2107         ptr[2] = ((newsk->mtu) >> 8) & 0xff;
2108         ptr[3] =(newsk->mtu) & 0xff;
2109 
2110         tcp_send_check(t1, daddr, saddr, sizeof(*t1)+4, newsk);
2111         newsk->prot->queue_xmit(newsk, dev, buff, 0);
2112 
2113         reset_timer(newsk, TIME_WRITE /* -1 ? FIXME ??? */, TCP_TIMEOUT_INIT);
2114         skb->sk = newsk;
2115 
2116         /*
2117          *      Charge the sock_buff to newsk. 
2118          */
2119          
2120         sk->rmem_alloc -= skb->mem_len;
2121         newsk->rmem_alloc += skb->mem_len;
2122         
2123         skb_queue_tail(&sk->receive_queue,skb);
2124         sk->ack_backlog++;
2125         release_sock(newsk);
2126         tcp_statistics.TcpOutSegs++;
2127 }
2128 
2129 
2130 static void tcp_close(struct sock *sk, int timeout)
     /* [previous][next][first][last][top][bottom][index][help] */
2131 {
2132         struct sk_buff *buff;
2133         int need_reset = 0;
2134         struct tcphdr *t1, *th;
2135         struct proto *prot;
2136         struct device *dev=NULL;
2137         int tmp;
2138 
2139         /*
2140          * We need to grab some memory, and put together a FIN, 
2141          * and then put it into the queue to be sent.
2142          */
2143         sk->inuse = 1;
2144         sk->keepopen = 1;
2145         sk->shutdown = SHUTDOWN_MASK;
2146 
2147         if (!sk->dead) 
2148                 sk->state_change(sk);
2149 
2150         /*
2151          *      We need to flush the recv. buffs. 
2152          */
2153 
2154         if (skb_peek(&sk->receive_queue) != NULL) 
2155         {
2156                 struct sk_buff *skb;
2157                 if(sk->debug)
2158                         printk("Clean rcv queue\n");
2159                 while((skb=skb_dequeue(&sk->receive_queue))!=NULL)
2160                 {
2161                         /* The +1 is not needed because the FIN takes up sequence space and
2162                            is not read!!! */
2163                         if(skb->len > 0 && after(skb->h.th->seq + skb->len/* + 1 */ , sk->copied_seq))
2164                                 need_reset = 1;
2165                         kfree_skb(skb, FREE_READ);
2166                 }
2167                 if(sk->debug)
2168                         printk("Cleaned.\n");
2169         }
2170 
2171         /*
2172          *      Get rid off any half-completed packets. 
2173          */
2174          
2175         if (sk->partial) 
2176         {
2177                 tcp_send_partial(sk);
2178         }
2179 
2180         switch(sk->state) 
2181         {
2182                 case TCP_FIN_WAIT1:
2183                 case TCP_FIN_WAIT2:
2184                 case TCP_CLOSING:
2185                         /*
2186                          * These states occur when we have already closed out
2187                          * our end.  If there is no timeout, we do not do
2188                          * anything.  We may still be in the middle of sending
2189                          * the remainder of our buffer, for example...
2190                          * resetting the timer would be inappropriate.
2191                          *
2192                          * XXX if retransmit count reaches limit, is tcp_close()
2193                          * called with timeout == 1 ? if not, we need to fix that.
2194                          */
2195 #ifdef NOTDEF
2196                         /* 
2197                          *      Start a timer.
2198                          * original code was 4 * sk->rtt.  In converting to the
2199                          * new rtt representation, we can't quite use that.
2200                          * it seems to make most sense to  use the backed off value
2201                          */
2202                         reset_timer(sk, TIME_CLOSE, 4 * sk->rto);
2203 #endif
2204                         if (timeout) 
2205                                 tcp_time_wait(sk);
2206                         release_sock(sk);
2207                         return; /* break causes a double release - messy */
2208                 case TCP_TIME_WAIT:
2209                 case TCP_LAST_ACK:
2210                         /*
2211                          * A timeout from these states terminates the TCB.
2212                          */
2213                         if (timeout) 
2214                         {
2215                                 sk->state = TCP_CLOSE;
2216                         }
2217                         release_sock(sk);
2218                         return;
2219                 case TCP_LISTEN:
2220                         sk->state = TCP_CLOSE;
2221                         release_sock(sk);
2222                         return;
2223                 case TCP_CLOSE:
2224                         release_sock(sk);
2225                         return;
2226                 case TCP_CLOSE_WAIT:
2227                 case TCP_ESTABLISHED:
2228                 case TCP_SYN_SENT:
2229                 case TCP_SYN_RECV:
2230                         prot =(struct proto *)sk->prot;
2231                         th =(struct tcphdr *)&sk->dummy_th;
2232                         buff = prot->wmalloc(sk, MAX_FIN_SIZE, 1, GFP_ATOMIC);
2233                         if (buff == NULL) 
2234                         {
2235                                 /* This will force it to try again later. */
2236                                 /* Or it would have if someone released the socket
2237                                    first. Anyway it might work now */
2238                                 release_sock(sk);
2239                                 if (sk->state != TCP_CLOSE_WAIT)
2240                                         sk->state = TCP_ESTABLISHED;
2241                                 reset_timer(sk, TIME_CLOSE, 100);
2242                                 return;
2243                         }
2244                         buff->sk = sk;
2245                         buff->free = 1;
2246                         buff->len = sizeof(*t1);
2247                         buff->localroute = sk->localroute;
2248                         t1 =(struct tcphdr *) buff->data;
2249         
2250                         /*
2251                          *      Put in the IP header and routing stuff. 
2252                          */
2253                         tmp = prot->build_header(buff,sk->saddr, sk->daddr, &dev,
2254                                          IPPROTO_TCP, sk->opt,
2255                                          sizeof(struct tcphdr),sk->ip_tos,sk->ip_ttl);
2256                         if (tmp < 0) 
2257                         {
2258                                 kfree_skb(buff,FREE_WRITE);
2259 
2260                                 /*
2261                                  * Enter FIN_WAIT1 to await completion of
2262                                  * written out data and ACK to our FIN.
2263                                  */
2264 
2265                                 if(sk->state==TCP_ESTABLISHED)
2266                                         sk->state=TCP_FIN_WAIT1;
2267                                 else
2268                                         sk->state=TCP_FIN_WAIT2;
2269                                 reset_timer(sk, TIME_CLOSE,4*sk->rto);
2270                                 if(timeout)
2271                                         tcp_time_wait(sk);
2272 
2273                                 release_sock(sk);
2274                                 return;
2275                         }
2276 
2277                         t1 =(struct tcphdr *)((char *)t1 +tmp);
2278                         buff->len += tmp;
2279                         buff->dev = dev;
2280                         memcpy(t1, th, sizeof(*t1));
2281                         t1->seq = ntohl(sk->write_seq);
2282                         sk->write_seq++;
2283                         buff->h.seq = sk->write_seq;
2284                         t1->ack = 1;
2285         
2286                         /* 
2287                          *      Ack everything immediately from now on. 
2288                          */
2289 
2290                         sk->delay_acks = 0;
2291                         t1->ack_seq = ntohl(sk->acked_seq);
2292                         t1->window = ntohs(sk->window=tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
2293                         t1->fin = 1;
2294                         t1->rst = need_reset;
2295                         t1->doff = sizeof(*t1)/4;
2296                         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
2297 
2298                         tcp_statistics.TcpOutSegs++;
2299         
2300                         if (skb_peek(&sk->write_queue) == NULL) 
2301                         {
2302                                 sk->sent_seq = sk->write_seq;
2303                                 prot->queue_xmit(sk, dev, buff, 0);
2304                         } 
2305                         else 
2306                         {
2307                                 reset_timer(sk, TIME_WRITE, sk->rto);
2308                                 if (buff->next != NULL) 
2309                                 {
2310                                         printk("tcp_close: next != NULL\n");
2311                                         skb_unlink(buff);
2312                                 }
2313                                 skb_queue_tail(&sk->write_queue, buff);
2314                         }
2315 
2316                         /*
2317                          * If established (normal close), enter FIN_WAIT1.
2318                          * If in CLOSE_WAIT, enter LAST_ACK
2319                          * If in CLOSING, remain in CLOSING
2320                          * otherwise enter FIN_WAIT2
2321                          */
2322 
2323                         if (sk->state == TCP_ESTABLISHED)
2324                             sk->state = TCP_FIN_WAIT1;
2325                         else if (sk->state == TCP_CLOSE_WAIT)
2326                             sk->state = TCP_LAST_ACK;
2327                         else if (sk->state != TCP_CLOSING)
2328                             sk->state = TCP_FIN_WAIT2;
2329         }
2330         release_sock(sk);
2331 }
2332 
2333 
2334 /*
2335  * This routine takes stuff off of the write queue,
2336  * and puts it in the xmit queue.
2337  */
2338 static void
2339 tcp_write_xmit(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2340 {
2341         struct sk_buff *skb;
2342 
2343         /*
2344          *      The bytes will have to remain here. In time closedown will
2345          *      empty the write queue and all will be happy 
2346          */
2347 
2348         if(sk->zapped)
2349                 return;
2350 
2351         while((skb = skb_peek(&sk->write_queue)) != NULL &&
2352                 before(skb->h.seq, sk->window_seq + 1) &&
2353                 (sk->retransmits == 0 ||
2354                  sk->timeout != TIME_WRITE ||
2355                  before(skb->h.seq, sk->rcv_ack_seq + 1))
2356                 && sk->packets_out < sk->cong_window) 
2357         {
2358                 IS_SKB(skb);
2359                 skb_unlink(skb);
2360                 /* See if we really need to send the packet. */
2361                 if (before(skb->h.seq, sk->rcv_ack_seq +1)) 
2362                 {
2363                         sk->retransmits = 0;
2364                         kfree_skb(skb, FREE_WRITE);
2365                         if (!sk->dead) 
2366                                 sk->write_space(sk);
2367                 } 
2368                 else
2369                 {
2370                         struct tcphdr *th;
2371                         struct iphdr *iph;
2372                         int size;
2373 /*
2374  * put in the ack seq and window at this point rather than earlier,
2375  * in order to keep them monotonic.  We really want to avoid taking
2376  * back window allocations.  That's legal, but RFC1122 says it's frowned on.
2377  * Ack and window will in general have changed since this packet was put
2378  * on the write queue.
2379  */
2380                         iph = (struct iphdr *)(skb->data +
2381                                                skb->dev->hard_header_len);
2382                         th = (struct tcphdr *)(((char *)iph) +(iph->ihl << 2));
2383                         size = skb->len - (((unsigned char *) th) - skb->data);
2384                         
2385                         th->ack_seq = ntohl(sk->acked_seq);
2386                         th->window = ntohs(tcp_select_window(sk));
2387 
2388                         tcp_send_check(th, sk->saddr, sk->daddr, size, sk);
2389 
2390                         sk->sent_seq = skb->h.seq;
2391                         sk->prot->queue_xmit(sk, skb->dev, skb, skb->free);
2392                 }
2393         }
2394 }
2395 
2396 
2397 /*
2398  *      This routine sorts the send list, and resets the
2399  *      sk->send_head and sk->send_tail pointers.
2400  */
2401 
2402 static void sort_send(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
2403 {
2404         struct sk_buff *list = NULL;
2405         struct sk_buff *skb,*skb2,*skb3;
2406 
2407         for (skb = sk->send_head; skb != NULL; skb = skb2) 
2408         {
2409                 skb2 = skb->link3;
2410                 if (list == NULL || before (skb2->h.seq, list->h.seq)) 
2411                 {
2412                         skb->link3 = list;
2413                         sk->send_tail = skb;
2414                         list = skb;
2415                 }
2416                 else
2417                 {
2418                         for (skb3 = list; ; skb3 = skb3->link3) 
2419                         {
2420                                 if (skb3->link3 == NULL ||
2421                                     before(skb->h.seq, skb3->link3->h.seq))
2422                                 {
2423                                         skb->link3 = skb3->link3;
2424                                         skb3->link3 = skb;
2425                                         if (skb->link3 == NULL) 
2426                                                 sk->send_tail = skb;
2427                                         break;
2428                                 }
2429                         }
2430                 }
2431         }
2432         sk->send_head = list;
2433 }
2434   
2435 
2436 /*
2437  *      This routine deals with incoming acks, but not outgoing ones.
2438  */
2439 
2440 static int tcp_ack(struct sock *sk, struct tcphdr *th, unsigned long saddr, int len)
     /* [previous][next][first][last][top][bottom][index][help] */
2441 {
2442         unsigned long ack;
2443         int flag = 0;
2444 
2445         /* 
2446          * 1 - there was data in packet as well as ack or new data is sent or 
2447          *     in shutdown state
2448          * 2 - data from retransmit queue was acked and removed
2449          * 4 - window shrunk or data from retransmit queue was acked and removed
2450          */
2451 
2452         if(sk->zapped)
2453                 return(1);      /* Dead, cant ack any more so why bother */
2454 
2455         ack = ntohl(th->ack_seq);
2456         if (ntohs(th->window) > sk->max_window) 
2457         {
2458                 sk->max_window = ntohs(th->window);
2459 #ifdef CONFIG_INET_PCTCP
2460                 sk->mss = min(sk->max_window>>1, sk->mtu);
2461 #else
2462                 sk->mss = min(sk->max_window, sk->mtu);
2463 #endif  
2464         }
2465 
2466         if (sk->retransmits && sk->timeout == TIME_KEEPOPEN)
2467                 sk->retransmits = 0;
2468 
2469 /*
2470  *      Not quite clear why the +1 and -1 here, and why not +1 in next line 
2471  */
2472  
2473         if (after(ack, sk->sent_seq+1) || before(ack, sk->rcv_ack_seq-1)) 
2474         {
2475                 if (after(ack, sk->sent_seq) ||
2476                    (sk->state != TCP_ESTABLISHED && sk->state != TCP_CLOSE_WAIT)) 
2477                 {
2478                         return(0);
2479                 }
2480                 if (sk->keepopen) 
2481                 {
2482                         reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2483                 }
2484                 return(1);
2485         }
2486 
2487         if (len != th->doff*4) 
2488                 flag |= 1;
2489 
2490         /* See if our window has been shrunk. */
2491 
2492         if (after(sk->window_seq, ack+ntohs(th->window))) 
2493         {
2494                 /*
2495                  * We may need to move packets from the send queue
2496                  * to the write queue, if the window has been shrunk on us.
2497                  * The RFC says you are not allowed to shrink your window
2498                  * like this, but if the other end does, you must be able
2499                  * to deal with it.
2500                  */
2501                 struct sk_buff *skb;
2502                 struct sk_buff *skb2;
2503                 struct sk_buff *wskb = NULL;
2504         
2505                 skb2 = sk->send_head;
2506                 sk->send_head = NULL;
2507                 sk->send_tail = NULL;
2508         
2509                 flag |= 4;
2510         
2511                 sk->window_seq = ack + ntohs(th->window);
2512                 cli();
2513                 while (skb2 != NULL) 
2514                 {
2515                         skb = skb2;
2516                         skb2 = skb->link3;
2517                         skb->link3 = NULL;
2518                         if (after(skb->h.seq, sk->window_seq)) 
2519                         {
2520                                 if (sk->packets_out > 0) 
2521                                         sk->packets_out--;
2522                                 /* We may need to remove this from the dev send list. */
2523                                 if (skb->next != NULL) 
2524                                 {
2525                                         skb_unlink(skb);                                
2526                                 }
2527                                 /* Now add it to the write_queue. */
2528                                 if (wskb == NULL)
2529                                         skb_queue_head(&sk->write_queue,skb);
2530                                 else
2531                                         skb_append(wskb,skb);
2532                                 wskb = skb;
2533                         } 
2534                         else 
2535                         {
2536                                 if (sk->send_head == NULL) 
2537                                 {
2538                                         sk->send_head = skb;
2539                                         sk->send_tail = skb;
2540                                 }
2541                                 else
2542                                 {
2543                                         sk->send_tail->link3 = skb;
2544                                         sk->send_tail = skb;
2545                                 }
2546                                 skb->link3 = NULL;
2547                         }
2548                 }
2549                 sti();
2550         }
2551 
2552         if (sk->send_tail == NULL || sk->send_head == NULL) 
2553         {
2554                 sk->send_head = NULL;
2555                 sk->send_tail = NULL;
2556                 sk->packets_out= 0;
2557         }
2558 
2559         sk->window_seq = ack + ntohs(th->window);
2560 
2561         /* We don't want too many packets out there. */
2562         if (sk->timeout == TIME_WRITE && 
2563                 sk->cong_window < 2048 && after(ack, sk->rcv_ack_seq)) 
2564         {
2565 /* 
2566  * This is Jacobson's slow start and congestion avoidance. 
2567  * SIGCOMM '88, p. 328.  Because we keep cong_window in integral
2568  * mss's, we can't do cwnd += 1 / cwnd.  Instead, maintain a 
2569  * counter and increment it once every cwnd times.  It's possible
2570  * that this should be done only if sk->retransmits == 0.  I'm
2571  * interpreting "new data is acked" as including data that has
2572  * been retransmitted but is just now being acked.
2573  */
2574                 if (sk->cong_window < sk->ssthresh)  
2575                   /* 
2576                    *    In "safe" area, increase
2577                    */
2578                         sk->cong_window++;
2579                 else 
2580                 {
2581                   /*
2582                    *    In dangerous area, increase slowly.  In theory this is
2583                    *    sk->cong_window += 1 / sk->cong_window
2584                    */
2585                         if (sk->cong_count >= sk->cong_window) 
2586                         {
2587                                 sk->cong_window++;
2588                                 sk->cong_count = 0;
2589                         }
2590                         else 
2591                                 sk->cong_count++;
2592                 }
2593         }
2594 
2595         sk->rcv_ack_seq = ack;
2596 
2597         /*
2598          * if this ack opens up a zero window, clear backoff.  It was
2599          * being used to time the probes, and is probably far higher than
2600          * it needs to be for normal retransmission.
2601          */
2602 
2603         if (sk->timeout == TIME_PROBE0) 
2604         {
2605                 if (skb_peek(&sk->write_queue) != NULL &&   /* should always be non-null */
2606                     ! before (sk->window_seq, sk->write_queue.next->h.seq)) 
2607                 {
2608                         sk->retransmits = 0;
2609                         sk->backoff = 0;
2610                   /*
2611                    *    Recompute rto from rtt.  this eliminates any backoff.
2612                    */
2613 
2614                         sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2615                         if (sk->rto > 120*HZ)
2616                                 sk->rto = 120*HZ;
2617                         if (sk->rto < 20)       /* Was 1*HZ, then 1 - turns out we must allow about
2618                                                    .2 of a second because of BSD delayed acks - on a 100Mb/sec link
2619                                                    .2 of a second is going to need huge windows (SIGH) */
2620                                 sk->rto = 20;
2621                 }
2622         }
2623 
2624   /* 
2625    *    See if we can take anything off of the retransmit queue.
2626    */
2627    
2628         while(sk->send_head != NULL) 
2629         {
2630                 /* Check for a bug. */
2631                 if (sk->send_head->link3 &&
2632                     after(sk->send_head->h.seq, sk->send_head->link3->h.seq)) 
2633                 {
2634                         printk("INET: tcp.c: *** bug send_list out of order.\n");
2635                         sort_send(sk);
2636                 }
2637 
2638                 if (before(sk->send_head->h.seq, ack+1)) 
2639                 {
2640                         struct sk_buff *oskb;   
2641                         if (sk->retransmits) 
2642                         {       
2643                                 /*
2644                                  *      We were retransmitting.  don't count this in RTT est 
2645                                  */
2646                                 flag |= 2;
2647 
2648                                 /*
2649                                  * even though we've gotten an ack, we're still
2650                                  * retransmitting as long as we're sending from
2651                                  * the retransmit queue.  Keeping retransmits non-zero
2652                                  * prevents us from getting new data interspersed with
2653                                  * retransmissions.
2654                                  */
2655 
2656                                 if (sk->send_head->link3)
2657                                         sk->retransmits = 1;
2658                                 else
2659                                         sk->retransmits = 0;
2660                         }
2661                         /*
2662                          * Note that we only reset backoff and rto in the
2663                          * rtt recomputation code.  And that doesn't happen
2664                          * if there were retransmissions in effect.  So the
2665                          * first new packet after the retransmissions is
2666                          * sent with the backoff still in effect.  Not until
2667                          * we get an ack from a non-retransmitted packet do
2668                          * we reset the backoff and rto.  This allows us to deal
2669                          * with a situation where the network delay has increased
2670                          * suddenly.  I.e. Karn's algorithm. (SIGCOMM '87, p5.)
2671                          */
2672 
2673                         /*
2674                          *      We have one less packet out there. 
2675                          */
2676                          
2677                         if (sk->packets_out > 0) 
2678                                 sk->packets_out --;
2679                         /* 
2680                          *      Wake up the process, it can probably write more. 
2681                          */
2682                         if (!sk->dead) 
2683                                 sk->write_space(sk);
2684                         oskb = sk->send_head;
2685 
2686                         if (!(flag&2)) 
2687                         {
2688                                 long m;
2689         
2690                                 /*
2691                                  *      The following amusing code comes from Jacobson's
2692                                  *      article in SIGCOMM '88.  Note that rtt and mdev
2693                                  *      are scaled versions of rtt and mean deviation.
2694                                  *      This is designed to be as fast as possible 
2695                                  *      m stands for "measurement".
2696                                  */
2697         
2698                                 m = jiffies - oskb->when;  /* RTT */
2699                                 if(m<=0)
2700                                         m=1;            /* IS THIS RIGHT FOR <0 ??? */
2701                                 m -= (sk->rtt >> 3);    /* m is now error in rtt est */
2702                                 sk->rtt += m;           /* rtt = 7/8 rtt + 1/8 new */
2703                                 if (m < 0)
2704                                         m = -m;         /* m is now abs(error) */
2705                                 m -= (sk->mdev >> 2);   /* similar update on mdev */
2706                                 sk->mdev += m;          /* mdev = 3/4 mdev + 1/4 new */
2707         
2708                                 /*
2709                                  *      Now update timeout.  Note that this removes any backoff.
2710                                  */
2711                          
2712                                 sk->rto = ((sk->rtt >> 2) + sk->mdev) >> 1;
2713                                 if (sk->rto > 120*HZ)
2714                                         sk->rto = 120*HZ;
2715                                 if (sk->rto < 20)       /* Was 1*HZ - keep .2 as minimum cos of the BSD delayed acks */
2716                                         sk->rto = 20;
2717                                 sk->backoff = 0;
2718                         }
2719                         flag |= (2|4);
2720                         cli();
2721                         oskb = sk->send_head;
2722                         IS_SKB(oskb);
2723                         sk->send_head = oskb->link3;
2724                         if (sk->send_head == NULL) 
2725                         {
2726                                 sk->send_tail = NULL;
2727                         }
2728 
2729                 /*
2730                  *      We may need to remove this from the dev send list. 
2731                  */
2732 
2733                         if (oskb->next)
2734                                 skb_unlink(oskb);
2735                         sti();
2736                         kfree_skb(oskb, FREE_WRITE); /* write. */
2737                         if (!sk->dead) 
2738                                 sk->write_space(sk);
2739                 }
2740                 else
2741                 {
2742                         break;
2743                 }
2744         }
2745 
2746         /*
2747          * Maybe we can take some stuff off of the write queue,
2748          * and put it onto the xmit queue.
2749          */
2750         if (skb_peek(&sk->write_queue) != NULL) 
2751         {
2752                 if (after (sk->window_seq+1, sk->write_queue.next->h.seq) &&
2753                         (sk->retransmits == 0 || 
2754                          sk->timeout != TIME_WRITE ||
2755                          before(sk->write_queue.next->h.seq, sk->rcv_ack_seq + 1))
2756                         && sk->packets_out < sk->cong_window) 
2757                 {
2758                         flag |= 1;
2759                         tcp_write_xmit(sk);
2760                 }
2761                 else if (before(sk->window_seq, sk->write_queue.next->h.seq) &&
2762                         sk->send_head == NULL &&
2763                         sk->ack_backlog == 0 &&
2764                         sk->state != TCP_TIME_WAIT) 
2765                 {
2766                         reset_timer(sk, TIME_PROBE0, sk->rto);
2767                 }               
2768         }
2769         else
2770         {
2771                 if (sk->send_head == NULL && sk->ack_backlog == 0 &&
2772                 sk->state != TCP_TIME_WAIT && !sk->keepopen) 
2773                 {
2774                         if (!sk->dead)
2775                                 sk->write_space(sk);
2776                         if (sk->keepopen)
2777                                 reset_timer(sk, TIME_KEEPOPEN, TCP_TIMEOUT_LEN);
2778                         else
2779                                 delete_timer(sk);
2780                 }
2781                 else
2782                 {
2783                         if (sk->state != (unsigned char) sk->keepopen) 
2784                         {
2785                                 reset_timer(sk, TIME_WRITE, sk->rto);
2786                         }
2787                         if (sk->state == TCP_TIME_WAIT) 
2788                         {
2789                                 reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
2790                         }       
2791                 }
2792         }
2793 
2794         if (sk->packets_out == 0 && sk->partial != NULL &&
2795                 skb_peek(&sk->write_queue) == NULL && sk->send_head == NULL) 
2796         {
2797                 flag |= 1;
2798                 tcp_send_partial(sk);
2799         }
2800 
2801         /*
2802          * In the LAST_ACK case, the other end FIN'd us.  We then FIN'd them, and
2803          * we are now waiting for an acknowledge to our FIN.  The other end is
2804          * already in TIME_WAIT.
2805          *
2806          * Move to TCP_CLOSE on success.
2807          */
2808 
2809         if (sk->state == TCP_LAST_ACK) 
2810         {
2811                 if (!sk->dead)
2812                         sk->state_change(sk);
2813                 if (sk->rcv_ack_seq == sk->write_seq && sk->acked_seq == sk->fin_seq) 
2814                 {
2815                         flag |= 1;
2816                         sk->state = TCP_CLOSE;
2817                         sk->shutdown = SHUTDOWN_MASK;
2818                 }
2819         }
2820 
2821         /*
2822          * Incomming ACK to a FIN we sent in the case of our initiating the close.
2823          *
2824          * Move to FIN_WAIT2 to await a FIN from the other end.
2825          */
2826 
2827         if (sk->state == TCP_FIN_WAIT1) 
2828         {
2829                 if (!sk->dead) 
2830                         sk->state_change(sk);
2831                 if (sk->rcv_ack_seq == sk->write_seq) 
2832                 {
2833                         flag |= 1;
2834                         if (sk->acked_seq != sk->fin_seq) 
2835                         {
2836                                 tcp_time_wait(sk);
2837                         }
2838                         else
2839                         {
2840                                 sk->shutdown = SHUTDOWN_MASK;
2841                                 sk->state = TCP_FIN_WAIT2;
2842                         }
2843                 }
2844         }
2845 
2846         /*
2847          *      Incoming ACK to a FIN we sent in the case of a simultanious close.
2848          *
2849          *      Move to TIME_WAIT
2850          */
2851 
2852         if (sk->state == TCP_CLOSING) 
2853         {
2854                 if (!sk->dead) 
2855                         sk->state_change(sk);
2856                 if (sk->rcv_ack_seq == sk->write_seq) 
2857                 {
2858                         flag |= 1;
2859                         tcp_time_wait(sk);
2860                 }
2861         }
2862 
2863         /*
2864          * I make no guarantees about the first clause in the following
2865          * test, i.e. "(!flag) || (flag&4)".  I'm not entirely sure under
2866          * what conditions "!flag" would be true.  However I think the rest
2867          * of the conditions would prevent that from causing any
2868          * unnecessary retransmission. 
2869          *   Clearly if the first packet has expired it should be 
2870          * retransmitted.  The other alternative, "flag&2 && retransmits", is
2871          * harder to explain:  You have to look carefully at how and when the
2872          * timer is set and with what timeout.  The most recent transmission always
2873          * sets the timer.  So in general if the most recent thing has timed
2874          * out, everything before it has as well.  So we want to go ahead and
2875          * retransmit some more.  If we didn't explicitly test for this
2876          * condition with "flag&2 && retransmits", chances are "when + rto < jiffies"
2877          * would not be true.  If you look at the pattern of timing, you can
2878          * show that rto is increased fast enough that the next packet would
2879          * almost never be retransmitted immediately.  Then you'd end up
2880          * waiting for a timeout to send each packet on the retranmission
2881          * queue.  With my implementation of the Karn sampling algorithm,
2882          * the timeout would double each time.  The net result is that it would
2883          * take a hideous amount of time to recover from a single dropped packet.
2884          * It's possible that there should also be a test for TIME_WRITE, but
2885          * I think as long as "send_head != NULL" and "retransmit" is on, we've
2886          * got to be in real retransmission mode.
2887          *   Note that ip_do_retransmit is called with all==1.  Setting cong_window
2888          * back to 1 at the timeout will cause us to send 1, then 2, etc. packets.
2889          * As long as no further losses occur, this seems reasonable.
2890          */
2891         
2892         if (((!flag) || (flag&4)) && sk->send_head != NULL &&
2893                (((flag&2) && sk->retransmits) ||
2894                (sk->send_head->when + sk->rto < jiffies))) 
2895         {
2896                 ip_do_retransmit(sk, 1);
2897                 reset_timer(sk, TIME_WRITE, sk->rto);
2898         }
2899 
2900         return(1);
2901 }
2902 
2903 
2904 /*
2905  *      This routine handles the data.  If there is room in the buffer,
2906  *      it will be have already been moved into it.  If there is no
2907  *      room, then we will just have to discard the packet.
2908  */
2909 
2910 static int tcp_data(struct sk_buff *skb, struct sock *sk, 
     /* [previous][next][first][last][top][bottom][index][help] */
2911          unsigned long saddr, unsigned short len)
2912 {
2913         struct sk_buff *skb1, *skb2;
2914         struct tcphdr *th;
2915         int dup_dumped=0;
2916         unsigned long new_seq;
2917 
2918         th = skb->h.th;
2919         skb->len = len -(th->doff*4);
2920 
2921         /* The bytes in the receive read/assembly queue has increased. Needed for the
2922            low memory discard algorithm */
2923            
2924         sk->bytes_rcv += skb->len;
2925         
2926         if (skb->len == 0 && !th->fin && !th->urg && !th->psh) 
2927         {
2928                 /* 
2929                  *      Don't want to keep passing ack's back and forth. 
2930                  *      (someone sent us dataless, boring frame)
2931                  */
2932                 if (!th->ack)
2933                         tcp_send_ack(sk->sent_seq, sk->acked_seq,sk, th, saddr);
2934                 kfree_skb(skb, FREE_READ);
2935                 return(0);
2936         }
2937         
2938         /*
2939          *      We no longer have anyone receiving data on this connection.
2940          */
2941 
2942         if(sk->shutdown & RCV_SHUTDOWN)
2943         {
2944                 new_seq= th->seq + skb->len + th->syn;  /* Right edge of _data_ part of frame */
2945                 
2946                 if(after(new_seq,sk->copied_seq+1))     /* If the right edge of this frame is after the last copied byte
2947                                                            then it contains data we will never touch. We send an RST to 
2948                                                            ensure the far end knows it never got to the application */
2949                 {
2950                         sk->acked_seq = new_seq + th->fin;
2951                         tcp_reset(sk->saddr, sk->daddr, skb->h.th,
2952                                 sk->prot, NULL, skb->dev, sk->ip_tos, sk->ip_ttl);
2953                         tcp_statistics.TcpEstabResets++;
2954                         sk->state = TCP_CLOSE;
2955                         sk->err = EPIPE;
2956                         sk->shutdown = SHUTDOWN_MASK;
2957                         kfree_skb(skb, FREE_READ);
2958                         if (!sk->dead)
2959                                 sk->state_change(sk);
2960                         return(0);
2961                 }
2962                 /* Discard the frame here - we've already proved its a duplicate */
2963                 
2964                 kfree_skb(skb, FREE_READ);
2965                 return(0);                              
2966         }
2967         /*
2968          *      Now we have to walk the chain, and figure out where this one
2969          *      goes into it.  This is set up so that the last packet we received
2970          *      will be the first one we look at, that way if everything comes
2971          *      in order, there will be no performance loss, and if they come
2972          *      out of order we will be able to fit things in nicely.
2973          */
2974 
2975         /* 
2976          *      This should start at the last one, and then go around forwards.
2977          */
2978 
2979         if (skb_peek(&sk->receive_queue) == NULL)       /* Empty queue is easy case */
2980         {
2981                 skb_queue_head(&sk->receive_queue,skb);
2982                 skb1= NULL;
2983         } 
2984         else
2985         {
2986                 for(skb1=sk->receive_queue.prev; ; skb1 = skb1->prev) 
2987                 {
2988                         if(sk->debug)
2989                         {
2990                                 printk("skb1=%p :", skb1);
2991                                 printk("skb1->h.th->seq = %ld: ", skb1->h.th->seq);
2992                                 printk("skb->h.th->seq = %ld\n",skb->h.th->seq);
2993                                 printk("copied_seq = %ld acked_seq = %ld\n", sk->copied_seq,
2994                                                 sk->acked_seq);
2995                         }
2996                         
2997                         /*
2998                          *      Optimisation: Duplicate frame or extension of previous frame from
2999                          *      same sequence point (lost ack case).
3000                          *      The frame contains duplicate data or replaces a previous frame
3001                          *      discard the previous frame (safe as sk->inuse is set) and put
3002                          *      the new one in its place.
3003                          */
3004                          
3005                         if (th->seq==skb1->h.th->seq && skb->len>= skb1->len)
3006                         {
3007                                 skb_append(skb1,skb);
3008                                 skb_unlink(skb1);
3009                                 kfree_skb(skb1,FREE_READ);
3010                                 dup_dumped=1;
3011                                 skb1=NULL;
3012                                 break;
3013                         }
3014                         
3015                         /*
3016                          *      Found where it fits
3017                          */
3018                          
3019                         if (after(th->seq+1, skb1->h.th->seq))
3020                         {
3021                                 skb_append(skb1,skb);
3022                                 break;
3023                         }
3024                         
3025                         /*
3026                          *      See if we've hit the start. If so insert.
3027                          */
3028                         if (skb1 == skb_peek(&sk->receive_queue))
3029                         {
3030                                 skb_queue_head(&sk->receive_queue, skb);
3031                                 break;
3032                         }
3033                 }
3034         }
3035 
3036         /*
3037          *      Figure out what the ack value for this frame is
3038          */
3039          
3040         th->ack_seq = th->seq + skb->len;
3041         if (th->syn) 
3042                 th->ack_seq++;
3043         if (th->fin)
3044                 th->ack_seq++;
3045 
3046         if (before(sk->acked_seq, sk->copied_seq)) 
3047         {
3048                 printk("*** tcp.c:tcp_data bug acked < copied\n");
3049                 sk->acked_seq = sk->copied_seq;
3050         }
3051 
3052         /*
3053          *      Now figure out if we can ack anything.
3054          */
3055 
3056         if ((!dup_dumped && (skb1 == NULL || skb1->acked)) || before(th->seq, sk->acked_seq+1)) 
3057         {
3058                 if (before(th->seq, sk->acked_seq+1)) 
3059                 {
3060                         int newwindow;
3061 
3062                         if (after(th->ack_seq, sk->acked_seq)) 
3063                         {
3064                                 newwindow = sk->window-(th->ack_seq - sk->acked_seq);
3065                                 if (newwindow < 0)
3066                                         newwindow = 0;  
3067                                 sk->window = newwindow;
3068                                 sk->acked_seq = th->ack_seq;
3069                         }
3070                         skb->acked = 1;
3071 
3072                         /* 
3073                          *      When we ack the fin, we turn on the RCV_SHUTDOWN flag.
3074                          */
3075 
3076                         if (skb->h.th->fin) 
3077                         {
3078                                 if (!sk->dead) 
3079                                         sk->state_change(sk);
3080                                 sk->shutdown |= RCV_SHUTDOWN;
3081                         }
3082           
3083                         for(skb2 = skb->next;
3084                             skb2 != (struct sk_buff *)&sk->receive_queue;
3085                             skb2 = skb2->next) 
3086                         {
3087                                 if (before(skb2->h.th->seq, sk->acked_seq+1)) 
3088                                 {
3089                                         if (after(skb2->h.th->ack_seq, sk->acked_seq))
3090                                         {
3091                                                 newwindow = sk->window -
3092                                                  (skb2->h.th->ack_seq - sk->acked_seq);
3093                                                 if (newwindow < 0)
3094                                                         newwindow = 0;  
3095                                                 sk->window = newwindow;
3096                                                 sk->acked_seq = skb2->h.th->ack_seq;
3097                                         }
3098                                         skb2->acked = 1;
3099                                         /*
3100                                          *      When we ack the fin, we turn on
3101                                          *      the RCV_SHUTDOWN flag.
3102                                          */
3103                                         if (skb2->h.th->fin) 
3104                                         {
3105                                                 sk->shutdown |= RCV_SHUTDOWN;
3106                                                 if (!sk->dead)
3107                                                         sk->state_change(sk);
3108                                         }
3109 
3110                                         /*
3111                                          *      Force an immediate ack.
3112                                          */
3113                                          
3114                                         sk->ack_backlog = sk->max_ack_backlog;
3115                                 }
3116                                 else
3117                                 {
3118                                         break;
3119                                 }
3120                         }
3121 
3122                         /*
3123                          *      This also takes care of updating the window.
3124                          *      This if statement needs to be simplified.
3125                          */
3126                         if (!sk->delay_acks ||
3127                             sk->ack_backlog >= sk->max_ack_backlog || 
3128                             sk->bytes_rcv > sk->max_unacked || th->fin) {
3129         /*                      tcp_send_ack(sk->sent_seq, sk->acked_seq,sk,th, saddr); */
3130                         }
3131                         else 
3132                         {
3133                                 sk->ack_backlog++;
3134                                 if(sk->debug)
3135                                         printk("Ack queued.\n");
3136                                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3137                         }
3138                 }
3139         }
3140 
3141         /*
3142          *      If we've missed a packet, send an ack.
3143          *      Also start a timer to send another.
3144          */
3145          
3146         if (!skb->acked) 
3147         {
3148         
3149         /*
3150          *      This is important.  If we don't have much room left,
3151          *      we need to throw out a few packets so we have a good
3152          *      window.  Note that mtu is used, not mss, because mss is really
3153          *      for the send side.  He could be sending us stuff as large as mtu.
3154          */
3155                  
3156                 while (sk->prot->rspace(sk) < sk->mtu) 
3157                 {
3158                         skb1 = skb_peek(&sk->receive_queue);
3159                         if (skb1 == NULL) 
3160                         {
3161                                 printk("INET: tcp.c:tcp_data memory leak detected.\n");
3162                                 break;
3163                         }
3164 
3165                         /*
3166                          *      Don't throw out something that has been acked. 
3167                          */
3168                  
3169                         if (skb1->acked) 
3170                         {
3171                                 break;
3172                         }
3173                 
3174                         skb_unlink(skb1);
3175                         kfree_skb(skb1, FREE_READ);
3176                 }
3177                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3178                 sk->ack_backlog++;
3179                 reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3180         }
3181         else
3182         {
3183                 /* We missed a packet.  Send an ack to try to resync things. */
3184                 tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3185         }
3186 
3187         /*
3188          *      Now tell the user we may have some data. 
3189          */
3190          
3191         if (!sk->dead) 
3192         {
3193                 if(sk->debug)
3194                         printk("Data wakeup.\n");
3195                 sk->data_ready(sk,0);
3196         } 
3197         return(0);
3198 }
3199 
3200 
3201 static void tcp_check_urg(struct sock * sk, struct tcphdr * th)
     /* [previous][next][first][last][top][bottom][index][help] */
3202 {
3203         unsigned long ptr = ntohs(th->urg_ptr);
3204 
3205         if (ptr)
3206                 ptr--;
3207         ptr += th->seq;
3208 
3209         /* ignore urgent data that we've already seen and read */
3210         if (after(sk->copied_seq+1, ptr))
3211                 return;
3212 
3213         /* do we already have a newer (or duplicate) urgent pointer? */
3214         if (sk->urg_data && !after(ptr, sk->urg_seq))
3215                 return;
3216 
3217         /* tell the world about our new urgent pointer */
3218         if (sk->proc != 0) {
3219                 if (sk->proc > 0) {
3220                         kill_proc(sk->proc, SIGURG, 1);
3221                 } else {
3222                         kill_pg(-sk->proc, SIGURG, 1);
3223                 }
3224         }
3225         sk->urg_data = URG_NOTYET;
3226         sk->urg_seq = ptr;
3227 }
3228 
3229 static inline int tcp_urg(struct sock *sk, struct tcphdr *th,
     /* [previous][next][first][last][top][bottom][index][help] */
3230         unsigned long saddr, unsigned long len)
3231 {
3232         unsigned long ptr;
3233 
3234         /* check if we get a new urgent pointer */
3235         if (th->urg)
3236                 tcp_check_urg(sk,th);
3237 
3238         /* do we wait for any urgent data? */
3239         if (sk->urg_data != URG_NOTYET)
3240                 return 0;
3241 
3242         /* is the urgent pointer pointing into this packet? */
3243         ptr = sk->urg_seq - th->seq + th->doff*4;
3244         if (ptr >= len)
3245                 return 0;
3246 
3247         /* ok, got the correct packet, update info */
3248         sk->urg_data = URG_VALID | *(ptr + (unsigned char *) th);
3249         if (!sk->dead)
3250                 sk->data_ready(sk,0);
3251         return 0;
3252 }
3253 
3254 
3255 /*
3256  *  This deals with incoming fins. 'Linus at 9 O'clock' 8-) 
3257  *
3258  *  If we are ESTABLISHED, a received fin moves us to CLOSE-WAIT
3259  *  (and thence onto LAST-ACK and finally, CLOSED, we never enter
3260  *  TIME-WAIT)
3261  *
3262  *  If we are in FINWAIT-1, a received FIN indicates simultanious
3263  *  close and we go into CLOSING (and later onto TIME-WAIT)
3264  *
3265  *  If we are in FINWAIT-2, a received FIN moves us to TIME-WAIT.
3266  *
3267  */
3268  
3269 static int tcp_fin(struct sk_buff *skb, struct sock *sk, struct tcphdr *th, 
     /* [previous][next][first][last][top][bottom][index][help] */
3270          unsigned long saddr, struct device *dev)
3271 {
3272         sk->fin_seq = th->seq + skb->len + th->syn + th->fin;
3273 
3274         if (!sk->dead) 
3275         {
3276                 sk->state_change(sk);
3277         }
3278 
3279         switch(sk->state) 
3280         {
3281                 case TCP_SYN_RECV:
3282                 case TCP_SYN_SENT:
3283                 case TCP_ESTABLISHED:
3284                         /*
3285                          * move to CLOSE_WAIT, tcp_data() already handled
3286                          * sending the ack.
3287                          */
3288                         reset_timer(sk, TIME_CLOSE, TCP_TIMEOUT_LEN);
3289                         /*sk->fin_seq = th->seq+1;*/
3290                         tcp_statistics.TcpCurrEstab--;
3291                         sk->state = TCP_CLOSE_WAIT;
3292                         if (th->rst)
3293                                 sk->shutdown = SHUTDOWN_MASK;
3294                         break;
3295 
3296                 case TCP_CLOSE_WAIT:
3297                 case TCP_CLOSING:
3298                         /*
3299                          * received a retransmission of the FIN, do
3300                          * nothing.
3301                          */
3302                         break;
3303                 case TCP_TIME_WAIT:
3304                         /*
3305                          * received a retransmission of the FIN,
3306                          * restart the TIME_WAIT timer.
3307                          */
3308                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3309                         return(0);
3310                 case TCP_FIN_WAIT1:
3311                         /*
3312                          * This case occurs when a simultanious close
3313                          * happens, we must ack the received FIN and
3314                          * enter the CLOSING state.
3315                          *
3316                          * XXX timeout not set properly
3317                          */
3318 
3319                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3320                         /*sk->fin_seq = th->seq+1;*/
3321                         sk->state = TCP_CLOSING;
3322                         break;
3323                 case TCP_FIN_WAIT2:
3324                         /*
3325                          * received a FIN -- send ACK and enter TIME_WAIT
3326                          */
3327                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3328                         /*sk->fin_seq = th->seq+1;*/
3329                         sk->state = TCP_TIME_WAIT;
3330                         break;
3331                 case TCP_CLOSE:
3332                         /*
3333                          * already in CLOSE
3334                          */
3335                         break;
3336                 default:
3337                         sk->state = TCP_LAST_ACK;
3338         
3339                         /* Start the timers. */
3340                         reset_timer(sk, TIME_CLOSE, TCP_TIMEWAIT_LEN);
3341                         return(0);
3342         }
3343         sk->ack_backlog++;
3344 
3345         return(0);
3346 }
3347 
3348 
3349 /* This will accept the next outstanding connection. */
3350 static struct sock *
3351 tcp_accept(struct sock *sk, int flags)
     /* [previous][next][first][last][top][bottom][index][help] */
3352 {
3353         struct sock *newsk;
3354         struct sk_buff *skb;
3355   
3356   /*
3357    * We need to make sure that this socket is listening,
3358    * and that it has something pending.
3359    */
3360 
3361         if (sk->state != TCP_LISTEN) 
3362         {
3363                 sk->err = EINVAL;
3364                 return(NULL); 
3365         }
3366 
3367         /* Avoid the race. */
3368         cli();
3369         sk->inuse = 1;
3370 
3371         while((skb = skb_dequeue(&sk->receive_queue)) == NULL) 
3372         {
3373                 if (flags & O_NONBLOCK) 
3374                 {
3375                         sti();
3376                         release_sock(sk);
3377                         sk->err = EAGAIN;
3378                         return(NULL);
3379                 }
3380 
3381                 release_sock(sk);
3382                 interruptible_sleep_on(sk->sleep);
3383                 if (current->signal & ~current->blocked) 
3384                 {
3385                         sti();
3386                         sk->err = ERESTARTSYS;
3387                         return(NULL);
3388                 }
3389                 sk->inuse = 1;
3390         }
3391         sti();
3392 
3393         /*
3394          *      Now all we need to do is return skb->sk. 
3395          */
3396 
3397         newsk = skb->sk;
3398 
3399         kfree_skb(skb, FREE_READ);
3400         sk->ack_backlog--;
3401         release_sock(sk);
3402         return(newsk);
3403 }
3404 
3405 
3406 /*
3407  *      This will initiate an outgoing connection. 
3408  */
3409  
3410 static int tcp_connect(struct sock *sk, struct sockaddr_in *usin, int addr_len)
     /* [previous][next][first][last][top][bottom][index][help] */
3411 {
3412         struct sk_buff *buff;
3413         struct device *dev=NULL;
3414         unsigned char *ptr;
3415         int tmp;
3416         struct tcphdr *t1;
3417         struct rtable *rt;
3418 
3419         if (sk->state != TCP_CLOSE) 
3420                 return(-EISCONN);
3421 
3422         if (addr_len < 8) 
3423                 return(-EINVAL);
3424 
3425         if (usin->sin_family && usin->sin_family != AF_INET) 
3426                 return(-EAFNOSUPPORT);
3427 
3428         /*
3429          *      connect() to INADDR_ANY means loopback (BSD'ism).
3430          */
3431         
3432         if(usin->sin_addr.s_addr==INADDR_ANY)
3433                 usin->sin_addr.s_addr=ip_my_addr();
3434                   
3435         /*
3436          *      Don't want a TCP connection going to a broadcast address 
3437          */
3438 
3439         if (ip_chk_addr(usin->sin_addr.s_addr) == IS_BROADCAST) 
3440         { 
3441                 return -ENETUNREACH;
3442         }
3443   
3444         /*
3445          *      Connect back to the same socket: Blows up so disallow it 
3446          */
3447 
3448         if(sk->saddr == usin->sin_addr.s_addr && sk->num==ntohs(usin->sin_port))
3449                 return -EBUSY;
3450 
3451         sk->inuse = 1;
3452         sk->daddr = usin->sin_addr.s_addr;
3453         sk->write_seq = jiffies * SEQ_TICK - seq_offset;
3454         sk->window_seq = sk->write_seq;
3455         sk->rcv_ack_seq = sk->write_seq -1;
3456         sk->err = 0;
3457         sk->dummy_th.dest = usin->sin_port;
3458         release_sock(sk);
3459 
3460         buff = sk->prot->wmalloc(sk,MAX_SYN_SIZE,0, GFP_KERNEL);
3461         if (buff == NULL) 
3462         {
3463                 return(-ENOMEM);
3464         }
3465         sk->inuse = 1;
3466         buff->len = 24;
3467         buff->sk = sk;
3468         buff->free = 1;
3469         buff->localroute = sk->localroute;
3470         
3471         t1 = (struct tcphdr *) buff->data;
3472 
3473         /*
3474          *      Put in the IP header and routing stuff. 
3475          */
3476          
3477         rt=ip_rt_route(sk->daddr, NULL, NULL);
3478         
3479 
3480         /*
3481          *      We need to build the routing stuff fromt the things saved in skb. 
3482          */
3483 
3484         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
3485                                         IPPROTO_TCP, NULL, MAX_SYN_SIZE,sk->ip_tos,sk->ip_ttl);
3486         if (tmp < 0) 
3487         {
3488                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
3489                 release_sock(sk);
3490                 return(-ENETUNREACH);
3491         }
3492 
3493         buff->len += tmp;
3494         t1 = (struct tcphdr *)((char *)t1 +tmp);
3495 
3496         memcpy(t1,(void *)&(sk->dummy_th), sizeof(*t1));
3497         t1->seq = ntohl(sk->write_seq++);
3498         sk->sent_seq = sk->write_seq;
3499         buff->h.seq = sk->write_seq;
3500         t1->ack = 0;
3501         t1->window = 2;
3502         t1->res1=0;
3503         t1->res2=0;
3504         t1->rst = 0;
3505         t1->urg = 0;
3506         t1->psh = 0;
3507         t1->syn = 1;
3508         t1->urg_ptr = 0;
3509         t1->doff = 6;
3510         /* use 512 or whatever user asked for */
3511 
3512         if (sk->user_mss)
3513                 sk->mtu = sk->user_mss;
3514         else if(rt!=NULL && rt->rt_flags&RTF_MTU)
3515                 sk->mtu = rt->rt_mtu;
3516         else 
3517         {
3518 #ifdef CONFIG_INET_SNARL
3519                 if ((sk->saddr ^ sk->daddr) & default_mask(sk->saddr))
3520 #else
3521                 if ((sk->saddr ^ sk->daddr) & dev->pa_mask)
3522 #endif
3523                         sk->mtu = 576 - HEADER_SIZE;
3524                 else
3525                         sk->mtu = MAX_WINDOW;
3526         }
3527         /*
3528          *      but not bigger than device MTU 
3529          */
3530 
3531         if(sk->mtu <32)
3532                 sk->mtu = 32;   /* Sanity limit */
3533                 
3534         sk->mtu = min(sk->mtu, dev->mtu - HEADER_SIZE);
3535         
3536         /*
3537          *      Put in the TCP options to say MTU. 
3538          */
3539 
3540         ptr = (unsigned char *)(t1+1);
3541         ptr[0] = 2;
3542         ptr[1] = 4;
3543         ptr[2] = (sk->mtu) >> 8;
3544         ptr[3] = (sk->mtu) & 0xff;
3545         tcp_send_check(t1, sk->saddr, sk->daddr,
3546                   sizeof(struct tcphdr) + 4, sk);
3547 
3548         /*
3549          *      This must go first otherwise a really quick response will get reset. 
3550          */
3551 
3552         sk->state = TCP_SYN_SENT;
3553 /*      sk->rtt = TCP_CONNECT_TIME;*/
3554         sk->rto = TCP_TIMEOUT_INIT;
3555         reset_timer(sk, TIME_WRITE, sk->rto);   /* Timer for repeating the SYN until an answer */
3556         sk->retransmits = TCP_RETR2 - TCP_SYN_RETRIES;
3557 
3558         sk->prot->queue_xmit(sk, dev, buff, 0);  
3559         tcp_statistics.TcpActiveOpens++;
3560         tcp_statistics.TcpOutSegs++;
3561   
3562         release_sock(sk);
3563         return(0);
3564 }
3565 
3566 
3567 /* This functions checks to see if the tcp header is actually acceptable. */
3568 static int
3569 tcp_sequence(struct sock *sk, struct tcphdr *th, short len,
     /* [previous][next][first][last][top][bottom][index][help] */
3570              struct options *opt, unsigned long saddr, struct device *dev)
3571 {
3572         unsigned long next_seq;
3573 
3574         next_seq = len - 4*th->doff;
3575         if (th->fin)
3576                 next_seq++;
3577         /* if we have a zero window, we can't have any data in the packet.. */
3578         if (next_seq && !sk->window)
3579                 goto ignore_it;
3580         next_seq += th->seq;
3581 
3582         /*
3583          * This isn't quite right.  sk->acked_seq could be more recent
3584          * than sk->window.  This is however close enough.  We will accept
3585          * slightly more packets than we should, but it should not cause
3586          * problems unless someone is trying to forge packets.
3587          */
3588 
3589         /* have we already seen all of this packet? */
3590         if (!after(next_seq+1, sk->acked_seq))
3591                 goto ignore_it;
3592         /* or does it start beyond the window? */
3593         if (!before(th->seq, sk->acked_seq + sk->window + 1))
3594                 goto ignore_it;
3595 
3596         /* ok, at least part of this packet would seem interesting.. */
3597         return 1;
3598 
3599 ignore_it:
3600         if (th->rst)
3601                 return 0;
3602 
3603         /*
3604          *      Send a reset if we get something not ours and we are
3605          *      unsynchronized. Note: We don't do anything to our end. We
3606          *      are just killing the bogus remote connection then we will
3607          *      connect again and it will work (with luck).
3608          */
3609          
3610         if (sk->state==TCP_SYN_SENT || sk->state==TCP_SYN_RECV) {
3611                 tcp_reset(sk->saddr,sk->daddr,th,sk->prot,NULL,dev, sk->ip_tos,sk->ip_ttl);
3612                 return 1;
3613         }
3614 
3615         /* Try to resync things. */
3616         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th, saddr);
3617         return 0;
3618 }
3619 
3620 
3621 #ifdef TCP_FASTPATH
3622 /*
3623  *      Is the end of the queue clear of fragments as yet unmerged into the data stream
3624  *      Yes if
3625  *      a) The queue is empty
3626  *      b) The last frame on the queue has the acked flag set
3627  */
3628 
3629 static inline int tcp_clean_end(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
3630 {
3631         struct sk_buff *skb=skb_peek(&sk->receive_queue);
3632         if(skb==NULL || sk->receive_queue.prev->acked)
3633                 return 1;
3634 }
3635 
3636 #endif
3637 
3638 int
3639 tcp_rcv(struct sk_buff *skb, struct device *dev, struct options *opt,
     /* [previous][next][first][last][top][bottom][index][help] */
3640         unsigned long daddr, unsigned short len,
3641         unsigned long saddr, int redo, struct inet_protocol * protocol)
3642 {
3643         struct tcphdr *th;
3644         struct sock *sk;
3645 
3646         if (!skb) 
3647         {
3648                 return(0);
3649         }
3650 
3651         if (!dev) 
3652         {
3653                 return(0);
3654         }
3655   
3656         tcp_statistics.TcpInSegs++;
3657   
3658         if(skb->pkt_type!=PACKET_HOST)
3659         {
3660                 kfree_skb(skb,FREE_READ);
3661                 return(0);
3662         }
3663   
3664         th = skb->h.th;
3665 
3666         /*
3667          *      Find the socket.
3668          */
3669 
3670         sk = get_sock(&tcp_prot, th->dest, saddr, th->source, daddr);
3671 
3672         /*
3673          *      If this socket has got a reset its to all intents and purposes 
3674          *      really dead 
3675          */
3676          
3677         if (sk!=NULL && sk->zapped)
3678                 sk=NULL;
3679 
3680         if (!redo) 
3681         {
3682                 if (tcp_check(th, len, saddr, daddr )) 
3683                 {
3684                         skb->sk = NULL;
3685                         kfree_skb(skb,FREE_READ);
3686                         /*
3687                          * We don't release the socket because it was
3688                          * never marked in use.
3689                          */
3690                         return(0);
3691                 }
3692                 th->seq = ntohl(th->seq);
3693 
3694                 /* See if we know about the socket. */
3695                 if (sk == NULL) 
3696                 {
3697                         if (!th->rst)
3698                                 tcp_reset(daddr, saddr, th, &tcp_prot, opt,dev,skb->ip_hdr->tos,255);
3699                         skb->sk = NULL;
3700                         kfree_skb(skb, FREE_READ);
3701                         return(0);
3702                 }
3703 
3704                 skb->len = len;
3705                 skb->sk = sk;
3706                 skb->acked = 0;
3707                 skb->used = 0;
3708                 skb->free = 0;
3709                 skb->saddr = daddr;
3710                 skb->daddr = saddr;
3711         
3712                 /* We may need to add it to the backlog here. */
3713                 cli();
3714                 if (sk->inuse) 
3715                 {
3716                         skb_queue_head(&sk->back_log, skb);
3717                         sti();
3718                         return(0);
3719                 }
3720                 sk->inuse = 1;
3721                 sti();
3722         }
3723         else
3724         {
3725                 if (!sk) 
3726                 {
3727                         return(0);
3728                 }
3729         }
3730 
3731 
3732         if (!sk->prot) 
3733         {
3734                 return(0);
3735         }
3736 
3737 
3738         /*
3739          *      Charge the memory to the socket. 
3740          */
3741          
3742         if (sk->rmem_alloc + skb->mem_len >= sk->rcvbuf) 
3743         {
3744                 skb->sk = NULL;
3745                 kfree_skb(skb, FREE_READ);
3746                 release_sock(sk);
3747                 return(0);
3748         }
3749 
3750         sk->rmem_alloc += skb->mem_len;
3751 
3752 #ifdef TCP_FASTPATH
3753 /*
3754  *      Incoming data stream fastpath. 
3755  *
3756  *      We try to optimise two things.
3757  *      1) Spot general data arriving without funny options and skip extra checks and the switch.
3758  *      2) Spot the common case in raw data receive streams of a packet that has no funny options,
3759  *      fits exactly on the end of the current queue and may or may not have the ack bit set.
3760  *
3761  *      Case two especially is done inline in this routine so there are no long jumps causing heavy
3762  *      cache thrashing, no function call overhead (except for the ack sending if needed) and for
3763  *      speed although further optimizing here is possible.
3764  */
3765  
3766         /* Im trusting gcc to optimise this sensibly... might need judicious application of a software mallet */
3767         if(!(sk->shutdown & RCV_SHUTDOWN) && sk->state==TCP_ESTABLISHED && !th->urg && !th->syn && !th->fin && !th->rst && !th->urg)
3768         {       
3769                 /* Packets in order. Fits window */
3770                 if(th->seq == sk->acked_seq+1 && sk->window && tcp_clean_end(sk))
3771                 {
3772                         /* Ack is harder */
3773                         if(th->ack && !tcp_ack(sk, th, saddr, len))
3774                         {
3775                                 kfree_skb(skb, FREE_READ);
3776                                 release_sock(sk);
3777                                 return 0;
3778                         }
3779                         /*
3780                          *      Set up variables
3781                          */
3782                         skb->len -= (th->doff *4);
3783                         sk->bytes_rcv += skb->len;
3784                         tcp_rx_hit2++;
3785                         if(skb->len)
3786                         {
3787                                 skb_queue_tail(&sk->receive_queue,skb); /* We already know where to put it */
3788                                 if(sk->window >= skb->len)
3789                                         sk->window-=skb->len;                   /* We know its effect on the window */
3790                                 else
3791                                         sk->window=0;
3792                                 sk->acked_seq = th->ack_seq;            /* Easy */
3793                                 skb->acked=1;                           /* Guaranteed true */
3794                                 if(!sk->delay_acks || sk->ack_backlog >= sk->max_ack_backlog || 
3795                                         sk->bytes_rcv > sk->max_unacked)
3796                                 {
3797                                         tcp_send_ack(sk->sent_seq, sk->acked_seq, sk, th , saddr);
3798                                 }
3799                                 else
3800                                 {
3801                                         sk->ack_backlog++;
3802                                         reset_timer(sk, TIME_WRITE, TCP_ACK_TIME);
3803                                 }
3804                                 if(!sk->dead)
3805                                         sk->data_ready(sk,0);
3806                                 return 0;
3807                         }
3808                 }
3809                 /*
3810                  *      More generic case of arriving data stream in ESTABLISHED
3811                  */
3812                 tcp_rx_hit1++;
3813                 if(!tcp_sequence(sk, th, len, opt, saddr, dev))
3814                 {
3815                         kfree_skb(skb, FREE_READ);
3816                         release_sock(sk);
3817                         return 0;
3818                 }
3819                 if(th->ack && !tcp_ack(sk, th, saddr, len))
3820                 {
3821                         kfree_skb(skb, FREE_READ);
3822                         release_sock(sk);
3823                         return 0;
3824                 }
3825                 if(tcp_data(skb, sk, saddr, len))
3826                         kfree_skb(skb, FREE_READ);
3827                 release_sock(sk);
3828                 return 0;
3829         }
3830         tcp_rx_miss++;
3831 #endif  
3832 
3833         /*
3834          *      Now deal with all cases.
3835          */
3836          
3837         switch(sk->state) 
3838         {
3839         
3840                 /*
3841                  * This should close the system down if it's waiting
3842                  * for an ack that is never going to be sent.
3843                  */
3844                 case TCP_LAST_ACK:
3845                         if (th->rst) 
3846                         {
3847                                 sk->zapped=1;
3848                                 sk->err = ECONNRESET;
3849                                 sk->state = TCP_CLOSE;
3850                                 sk->shutdown = SHUTDOWN_MASK;
3851                                 if (!sk->dead) 
3852                                 {
3853                                         sk->state_change(sk);
3854                                 }
3855                                 kfree_skb(skb, FREE_READ);
3856                                 release_sock(sk);
3857                                 return(0);
3858                         }
3859 
3860                 case TCP_ESTABLISHED:
3861                 case TCP_CLOSE_WAIT:
3862                 case TCP_CLOSING:
3863                 case TCP_FIN_WAIT1:
3864                 case TCP_FIN_WAIT2:
3865                 case TCP_TIME_WAIT:
3866                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3867                         {
3868                                 kfree_skb(skb, FREE_READ);
3869                                 release_sock(sk);
3870                                 return(0);
3871                         }
3872 
3873                         if (th->rst) 
3874                         {
3875                                 tcp_statistics.TcpEstabResets++;
3876                                 tcp_statistics.TcpCurrEstab--;
3877                                 sk->zapped=1;
3878                                 /* This means the thing should really be closed. */
3879                                 sk->err = ECONNRESET;
3880                                 if (sk->state == TCP_CLOSE_WAIT) 
3881                                 {
3882                                         sk->err = EPIPE;
3883                                 }
3884         
3885                                 /*
3886                                  * A reset with a fin just means that
3887                                  * the data was not all read.
3888                                  */
3889                                 sk->state = TCP_CLOSE;
3890                                 sk->shutdown = SHUTDOWN_MASK;
3891                                 if (!sk->dead) 
3892                                 {
3893                                         sk->state_change(sk);
3894                                 }
3895                                 kfree_skb(skb, FREE_READ);
3896                                 release_sock(sk);
3897                                 return(0);
3898                         }
3899                         if (th->syn) 
3900                         {
3901                                 tcp_statistics.TcpCurrEstab--;
3902                                 tcp_statistics.TcpEstabResets++;
3903                                 sk->err = ECONNRESET;
3904                                 sk->state = TCP_CLOSE;
3905                                 sk->shutdown = SHUTDOWN_MASK;
3906                                 tcp_reset(daddr, saddr,  th, sk->prot, opt,dev, sk->ip_tos,sk->ip_ttl);
3907                                 if (!sk->dead) {
3908                                         sk->state_change(sk);
3909                                 }
3910                                 kfree_skb(skb, FREE_READ);
3911                                 release_sock(sk);
3912                                 return(0);
3913                         }
3914         
3915                         if (th->ack && !tcp_ack(sk, th, saddr, len)) {
3916                                 kfree_skb(skb, FREE_READ);
3917                                 release_sock(sk);
3918                                 return(0);
3919                         }
3920         
3921                         if (tcp_urg(sk, th, saddr, len)) {
3922                                 kfree_skb(skb, FREE_READ);
3923                                 release_sock(sk);
3924                                 return(0);
3925                         }
3926 
3927                         if (th->fin && tcp_fin(skb, sk, th, saddr, dev)) {
3928                                 kfree_skb(skb, FREE_READ);
3929                                 release_sock(sk);
3930                                 return(0);
3931                         }
3932         
3933                         if (tcp_data(skb, sk, saddr, len)) {
3934                                 kfree_skb(skb, FREE_READ);
3935                                 release_sock(sk);
3936                                 return(0);
3937                         }       
3938         
3939                         release_sock(sk);
3940                         return(0);
3941                 
3942                 case TCP_CLOSE:
3943                         if (sk->dead || sk->daddr) {
3944                                 kfree_skb(skb, FREE_READ);
3945                                         release_sock(sk);
3946                                 return(0);
3947                         }
3948         
3949                         if (!th->rst) {
3950                                 if (!th->ack)
3951                                         th->ack_seq = 0;
3952                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3953                         }
3954                         kfree_skb(skb, FREE_READ);
3955                         release_sock(sk);
3956                                 return(0);
3957         
3958                 case TCP_LISTEN:
3959                         if (th->rst) {
3960                                 kfree_skb(skb, FREE_READ);
3961                                 release_sock(sk);
3962                                 return(0);
3963                         }
3964                         if (th->ack) {
3965                                 tcp_reset(daddr, saddr, th, sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
3966                                 kfree_skb(skb, FREE_READ);
3967                                 release_sock(sk);
3968                                 return(0);
3969                         }
3970         
3971                         if (th->syn) 
3972                         {
3973                                 /*
3974                                  * Now we just put the whole thing including
3975                                  * the header and saddr, and protocol pointer
3976                                  * into the buffer.  We can't respond until the
3977                                  * user tells us to accept the connection.
3978                                  */
3979                                 tcp_conn_request(sk, skb, daddr, saddr, opt, dev);
3980                                 release_sock(sk);
3981                                 return(0);
3982                         }
3983 
3984                         kfree_skb(skb, FREE_READ);
3985                         release_sock(sk);
3986                         return(0);
3987 
3988                 case TCP_SYN_RECV:
3989                         if (th->syn) {
3990                                 /* Probably a retransmitted syn */
3991                                 kfree_skb(skb, FREE_READ);
3992                                 release_sock(sk);
3993                                 return(0);
3994                         }
3995         
3996         
3997                 default:
3998                         if (!tcp_sequence(sk, th, len, opt, saddr,dev)) 
3999                         {
4000                                 kfree_skb(skb, FREE_READ);
4001                                 release_sock(sk);
4002                                 return(0);
4003                         }
4004         
4005                 case TCP_SYN_SENT:
4006                         if (th->rst) 
4007                         {
4008                                 tcp_statistics.TcpAttemptFails++;
4009                                 sk->err = ECONNREFUSED;
4010                                 sk->state = TCP_CLOSE;
4011                                 sk->shutdown = SHUTDOWN_MASK;
4012                                 sk->zapped = 1;
4013                                 if (!sk->dead) 
4014                                 {
4015                                         sk->state_change(sk);
4016                                 }
4017                                 kfree_skb(skb, FREE_READ);
4018                                 release_sock(sk);
4019                                 return(0);
4020                         }
4021                         if (!th->ack) 
4022                         {
4023                                 if (th->syn) 
4024                                 {
4025                                         sk->state = TCP_SYN_RECV;
4026                                 }
4027                                 kfree_skb(skb, FREE_READ);
4028                                 release_sock(sk);
4029                                 return(0);
4030                         }
4031         
4032                         switch(sk->state) 
4033                         {
4034                                 case TCP_SYN_SENT:
4035                                         if (!tcp_ack(sk, th, saddr, len)) 
4036                                         {
4037                                                 tcp_statistics.TcpAttemptFails++;
4038                                                 tcp_reset(daddr, saddr, th,
4039                                                         sk->prot, opt,dev,sk->ip_tos,sk->ip_ttl);
4040                                                 kfree_skb(skb, FREE_READ);
4041                                                         release_sock(sk);
4042                                                 return(0);
4043                                         }
4044         
4045                                         /*
4046                                          * If the syn bit is also set, switch to
4047                                          * tcp_syn_recv, and then to established.
4048                                          */
4049                                         if (!th->syn) 
4050                                         {
4051                                                 kfree_skb(skb, FREE_READ);
4052                                                 release_sock(sk);
4053                                                 return(0);
4054                                         }
4055         
4056                                         /* Ack the syn and fall through. */
4057                                         sk->acked_seq = th->seq+1;
4058                                         sk->fin_seq = th->seq;
4059                                         tcp_send_ack(sk->sent_seq, th->seq+1,
4060                                                 sk, th, sk->daddr);
4061                 
4062                                 case TCP_SYN_RECV:
4063                                         if (!tcp_ack(sk, th, saddr, len)) 
4064                                         {
4065                                                 tcp_statistics.TcpAttemptFails++;
4066                                                 tcp_reset(daddr, saddr, th,
4067                                                         sk->prot, opt, dev,sk->ip_tos,sk->ip_ttl);
4068                                                 kfree_skb(skb, FREE_READ);
4069                                                 release_sock(sk);
4070                                                 return(0);
4071                                         }
4072         
4073                                         tcp_statistics.TcpCurrEstab++;
4074                                         sk->state = TCP_ESTABLISHED;
4075         
4076                                         /*
4077                                          *      Now we need to finish filling out
4078                                          *      some of the tcp header.
4079                                          * 
4080                                          *      We need to check for mtu info. 
4081                                          */
4082                                         tcp_options(sk, th);
4083                                         sk->dummy_th.dest = th->source;
4084                                         sk->copied_seq = sk->acked_seq-1;
4085                                         if (!sk->dead) 
4086                                         {
4087                                                 sk->state_change(sk);
4088                                         }
4089         
4090                                         /*
4091                                          * We've already processed his first
4092                                          * ack.  In just about all cases that
4093                                          * will have set max_window.  This is
4094                                          * to protect us against the possibility
4095                                          * that the initial window he sent was 0.
4096                                          * This must occur after tcp_options, which
4097                                          * sets sk->mtu.
4098                                          */
4099                                         if (sk->max_window == 0) 
4100                                         {
4101                                                 sk->max_window = 32;
4102                                                 sk->mss = min(sk->max_window, sk->mtu);
4103                                         }
4104 
4105                                         /*
4106                                          * Now process the rest like we were
4107                                          * already in the established state.
4108                                          */
4109                                         if (th->urg) 
4110                                         {
4111                                                 if (tcp_urg(sk, th, saddr, len)) 
4112                                                 { 
4113                                                         kfree_skb(skb, FREE_READ);
4114                                                         release_sock(sk);
4115                                                         return(0);
4116                                                 }
4117                                         }
4118                                         if (tcp_data(skb, sk, saddr, len))
4119                                                 kfree_skb(skb, FREE_READ);
4120 
4121                                         if (th->fin)
4122                                                 tcp_fin(skb, sk, th, saddr, dev);
4123                                         release_sock(sk);
4124                                         return(0);
4125                         }
4126         
4127                         if (th->urg) 
4128                         {
4129                                 if (tcp_urg(sk, th, saddr, len)) 
4130                                 {
4131                                         kfree_skb(skb, FREE_READ);
4132                                         release_sock(sk);
4133                                         return(0);
4134                                 }
4135                         }
4136                         if (tcp_data(skb, sk, saddr, len)) 
4137                         {
4138                                 kfree_skb(skb, FREE_READ);
4139                                 release_sock(sk);
4140                                 return(0);
4141                         }
4142         
4143                         if (!th->fin) 
4144                         {
4145                                 release_sock(sk);
4146                                 return(0);
4147                         }
4148                         tcp_fin(skb, sk, th, saddr, dev);
4149                         release_sock(sk);
4150                         return(0);
4151         }
4152 }
4153 
4154 
4155 /*
4156  * This routine sends a packet with an out of date sequence
4157  * number. It assumes the other end will try to ack it.
4158  */
4159 
4160 static void tcp_write_wakeup(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4161 {
4162         struct sk_buff *buff;
4163         struct tcphdr *t1;
4164         struct device *dev=NULL;
4165         int tmp;
4166 
4167         if (sk->zapped)
4168                 return; /* Afer a valid reset we can send no more */
4169 
4170         /*
4171          * Write data can still be transmitted/retransmitted in the
4172          * following states.  If any other state is encountered, return.
4173          */
4174 
4175         if (sk->state != TCP_ESTABLISHED && 
4176             sk->state != TCP_CLOSE_WAIT &&
4177             sk->state != TCP_FIN_WAIT1 && 
4178             sk->state != TCP_LAST_ACK &&
4179             sk->state != TCP_CLOSING
4180         ) {
4181                 return;
4182         }
4183 
4184         buff = sk->prot->wmalloc(sk,MAX_ACK_SIZE,1, GFP_ATOMIC);
4185         if (buff == NULL) 
4186                 return;
4187 
4188         buff->len = sizeof(struct tcphdr);
4189         buff->free = 1;
4190         buff->sk = sk;
4191         buff->localroute = sk->localroute;
4192 
4193         t1 = (struct tcphdr *) buff->data;
4194 
4195         /* Put in the IP header and routing stuff. */
4196         tmp = sk->prot->build_header(buff, sk->saddr, sk->daddr, &dev,
4197                                 IPPROTO_TCP, sk->opt, MAX_ACK_SIZE,sk->ip_tos,sk->ip_ttl);
4198         if (tmp < 0) 
4199         {
4200                 sk->prot->wfree(sk, buff->mem_addr, buff->mem_len);
4201                 return;
4202         }
4203 
4204         buff->len += tmp;
4205         t1 = (struct tcphdr *)((char *)t1 +tmp);
4206 
4207         memcpy(t1,(void *) &sk->dummy_th, sizeof(*t1));
4208 
4209         /*
4210          * Use a previous sequence.
4211          * This should cause the other end to send an ack.
4212          */
4213         t1->seq = htonl(sk->sent_seq-1);
4214         t1->ack = 1; 
4215         t1->res1= 0;
4216         t1->res2= 0;
4217         t1->rst = 0;
4218         t1->urg = 0;
4219         t1->psh = 0;
4220         t1->fin = 0;
4221         t1->syn = 0;
4222         t1->ack_seq = ntohl(sk->acked_seq);
4223         t1->window = ntohs(tcp_select_window(sk)/*sk->prot->rspace(sk)*/);
4224         t1->doff = sizeof(*t1)/4;
4225         tcp_send_check(t1, sk->saddr, sk->daddr, sizeof(*t1), sk);
4226 
4227          /*     Send it and free it.
4228           *     This will prevent the timer from automatically being restarted.
4229           */
4230         sk->prot->queue_xmit(sk, dev, buff, 1);
4231         tcp_statistics.TcpOutSegs++;
4232 }
4233 
4234 void
4235 tcp_send_probe0(struct sock *sk)
     /* [previous][next][first][last][top][bottom][index][help] */
4236 {
4237         if (sk->zapped)
4238                 return;         /* Afer a valid reset we can send no more */
4239 
4240         tcp_write_wakeup(sk);
4241 
4242         sk->backoff++;
4243         sk->rto = min(sk->rto << 1, 120*HZ);
4244         reset_timer (sk, TIME_PROBE0, sk->rto);
4245         sk->retransmits++;
4246         sk->prot->retransmits ++;
4247 }
4248 
4249 /*
4250  *      Socket option code for TCP. 
4251  */
4252   
4253 int tcp_setsockopt(struct sock *sk, int level, int optname, char *optval, int optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4254 {
4255         int val,err;
4256 
4257         if(level!=SOL_TCP)
4258                 return ip_setsockopt(sk,level,optname,optval,optlen);
4259 
4260         if (optval == NULL) 
4261                 return(-EINVAL);
4262 
4263         err=verify_area(VERIFY_READ, optval, sizeof(int));
4264         if(err)
4265                 return err;
4266         
4267         val = get_fs_long((unsigned long *)optval);
4268 
4269         switch(optname)
4270         {
4271                 case TCP_MAXSEG:
4272 /*                      if(val<200||val>2048 || val>sk->mtu) */
4273 /*
4274  * values greater than interface MTU won't take effect.  however at
4275  * the point when this call is done we typically don't yet know
4276  * which interface is going to be used
4277  */
4278                         if(val<1||val>MAX_WINDOW)
4279                                 return -EINVAL;
4280                         sk->user_mss=val;
4281                         return 0;
4282                 case TCP_NODELAY:
4283                         sk->nonagle=(val==0)?0:1;
4284                         return 0;
4285                 default:
4286                         return(-ENOPROTOOPT);
4287         }
4288 }
4289 
4290 int tcp_getsockopt(struct sock *sk, int level, int optname, char *optval, int *optlen)
     /* [previous][next][first][last][top][bottom][index][help] */
4291 {
4292         int val,err;
4293 
4294         if(level!=SOL_TCP)
4295                 return ip_getsockopt(sk,level,optname,optval,optlen);
4296                         
4297         switch(optname)
4298         {
4299                 case TCP_MAXSEG:
4300                         val=sk->user_mss;
4301                         break;
4302                 case TCP_NODELAY:
4303                         val=sk->nonagle;        /* Until Johannes stuff is in */
4304                         break;
4305                 default:
4306                         return(-ENOPROTOOPT);
4307         }
4308         err=verify_area(VERIFY_WRITE, optlen, sizeof(int));
4309         if(err)
4310                 return err;
4311         put_fs_long(sizeof(int),(unsigned long *) optlen);
4312 
4313         err=verify_area(VERIFY_WRITE, optval, sizeof(int));
4314         if(err)
4315                 return err;
4316         put_fs_long(val,(unsigned long *)optval);
4317 
4318         return(0);
4319 }       
4320 
4321 
4322 struct proto tcp_prot = {
4323         sock_wmalloc,
4324         sock_rmalloc,
4325         sock_wfree,
4326         sock_rfree,
4327         sock_rspace,
4328         sock_wspace,
4329         tcp_close,
4330         tcp_read,
4331         tcp_write,
4332         tcp_sendto,
4333         tcp_recvfrom,
4334         ip_build_header,
4335         tcp_connect,
4336         tcp_accept,
4337         ip_queue_xmit,
4338         tcp_retransmit,
4339         tcp_write_wakeup,
4340         tcp_read_wakeup,
4341         tcp_rcv,
4342         tcp_select,
4343         tcp_ioctl,
4344         NULL,
4345         tcp_shutdown,
4346         tcp_setsockopt,
4347         tcp_getsockopt,
4348         128,
4349         0,
4350         {NULL,},
4351         "TCP"
4352 };

/* [previous][next][first][last][top][bottom][index][help] */